[llvm] 8bd0e94 - Revert "Transform vector SET{LE/ULT/ULE} -> SETLT and SET{GE/UGT/UGE} -> SETGT if possible"

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 15 14:33:54 PST 2023


Author: Noah Goldstein
Date: 2023-02-15T16:33:38-06:00
New Revision: 8bd0e9481cfcba53946433011d841280fd456caa

URL: https://github.com/llvm/llvm-project/commit/8bd0e9481cfcba53946433011d841280fd456caa
DIFF: https://github.com/llvm/llvm-project/commit/8bd0e9481cfcba53946433011d841280fd456caa.diff

LOG: Revert "Transform vector SET{LE/ULT/ULE} -> SETLT and SET{GE/UGT/UGE} -> SETGT if possible"

This reverts commit f3732c2b18df305a1927b9d4a94610421a2750e7.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/vector-compare-simplify.ll
    llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
    llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
    llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 25054b769482..50f0ef63dc7e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24725,8 +24725,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
 /// incremented or decremented. If incrementing or decrementing would result in
 /// unsigned overflow or underflow or this is not a simple vector constant,
 /// return an empty value.
-static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
-                                    bool NSW) {
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
   auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
   if (!BV)
     return SDValue();
@@ -24745,9 +24744,6 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
     const APInt &EltC = Elt->getAPIntValue();
     if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
       return SDValue();
-    if (NSW && ((IsInc && EltC.isMaxSignedValue()) ||
-                (!IsInc && EltC.isMinSignedValue())))
-      return SDValue();
 
     NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
   }
@@ -24781,8 +24777,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // Only do this pre-AVX since vpcmp* is no longer destructive.
     if (Subtarget.hasAVX())
       return SDValue();
-    SDValue ULEOp1 =
-        incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false);
+    SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false);
     if (!ULEOp1)
       return SDValue();
     Op1 = ULEOp1;
@@ -24793,8 +24788,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // This is beneficial because materializing a constant 0 for the PCMPEQ is
     // probably cheaper than XOR+PCMPGT using 2 
diff erent vector constants:
     // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
-    SDValue UGEOp1 =
-        incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false);
+    SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true);
     if (!UGEOp1)
       return SDValue();
     Op1 = Op0;
@@ -25087,16 +25081,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     // condition to avoid an invert.
     if (Cond == ISD::SETUGT) {
       // X > C --> X >= (C+1) --> X == umax(X, C+1)
-      if (SDValue UGTOp1 =
-              incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false)) {
+      if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) {
         Op1 = UGTOp1;
         Cond = ISD::SETUGE;
       }
     }
     if (Cond == ISD::SETULT) {
       // X < C --> X <= (C-1) --> X == umin(X, C-1)
-      if (SDValue ULTOp1 =
-              incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false)) {
+      if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) {
         Op1 = ULTOp1;
         Cond = ISD::SETULE;
       }
@@ -53897,25 +53889,6 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
-/// pre-promote its result type since vXi1 vectors don't get promoted
-/// during type legalization.
-/// NOTE: The element count check is to ignore operand types that need to
-/// go through type promotion to a 128-bit vector.
-static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,
-                                        SDValue RHS, ISD::CondCode CC, SDLoc DL,
-                                        SelectionDAG &DAG,
-                                        const X86Subtarget &Subtarget) {
-  if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
-      VT.getVectorElementType() == MVT::i1 &&
-      (OpVT.getVectorElementType() == MVT::i8 ||
-       OpVT.getVectorElementType() == MVT::i16)) {
-    SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
-    return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
-  }
-  return SDValue();
-}
-
 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
@@ -54051,79 +54024,19 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try and make unsigned vector comparison signed. On pre AVX512 targets there
-  // only are unsigned comparisons (`PCMPGT`) and on AVX512 its often better to
-  // use `PCMPGT` if the result is mean to stay in a vector (and if its going to
-  // a mask, there are signed AVX512 comparisons).
-  if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) {
-    bool CanMakeSigned = false;
-    if (ISD::isUnsignedIntSetCC(CC)) {
-      KnownBits CmpKnown = KnownBits::commonBits(DAG.computeKnownBits(LHS),
-                                                 DAG.computeKnownBits(RHS));
-      // If we know LHS/RHS share the same sign bit at each element we can
-      // make this signed.
-      // NOTE: `computeKnownBits` on a vector type aggregates common bits
-      // across all lanes. So a pattern where the sign varies from lane to
-      // lane, but at each lane Sign(LHS) is known to equal Sign(RHS), will be
-      // missed. We could get around this by demanding each lane
-      // independently, but this isn't the most important optimization and
-      // that may eat into compile time.
-      CanMakeSigned =
-          CmpKnown.Zero.isSignBitSet() || CmpKnown.One.isSignBitSet();
-    }
-    if (CanMakeSigned || ISD::isSignedIntSetCC(CC)) {
-      SDValue LHSOut = LHS;
-      SDValue RHSOut = RHS;
-      ISD::CondCode NewCC = CC;
-      switch (CC) {
-      case ISD::SETGE:
-      case ISD::SETUGE:
-        if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ true,
-                                                  /*NSW*/ true))
-          LHSOut = NewLHS;
-        else if (SDValue NewRHS = incDecVectorConstant(
-                     RHS, DAG, /*IsInc*/ false, /*NSW*/ true))
-          RHSOut = NewRHS;
-        else
-          break;
-
-        [[fallthrough]];
-      case ISD::SETUGT:
-        NewCC = ISD::SETGT;
-        break;
-
-      case ISD::SETLE:
-      case ISD::SETULE:
-        if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ false,
-                                                  /*NSW*/ true))
-          LHSOut = NewLHS;
-        else if (SDValue NewRHS = incDecVectorConstant(RHS, DAG, /*IsInc*/ true,
-                                                       /*NSW*/ true))
-          RHSOut = NewRHS;
-        else
-          break;
-
-        [[fallthrough]];
-      case ISD::SETULT:
-        // Will be swapped to SETGT in LowerVSETCC*.
-        NewCC = ISD::SETLT;
-        break;
-      default:
-        break;
-      }
-      if (NewCC != CC) {
-        if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,
-                                                 NewCC, DL, DAG, Subtarget))
-          return R;
-        return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC);
-      }
-    }
+  // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
+  // pre-promote its result type since vXi1 vectors don't get promoted
+  // during type legalization.
+  // NOTE: The element count check is to ignore operand types that need to
+  // go through type promotion to a 128-bit vector.
+  if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
+      VT.getVectorElementType() == MVT::i1 &&
+      (OpVT.getVectorElementType() == MVT::i8 ||
+       OpVT.getVectorElementType() == MVT::i16)) {
+    SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
   }
 
-  if (SDValue R =
-          truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
-    return R;
-
   // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
   // to avoid scalarization via legalization because v4i32 is not a legal type.
   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&

diff  --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
index a3c7c409a85b..2092de25d5d0 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
@@ -246,7 +246,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; AVX512VL-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpternlogd $200, %xmm1, %xmm2, %xmm0
-; AVX512VL-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX512VL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
 ; AVX512VL-NEXT:    kshiftrw $1, %k0, %k1
 ; AVX512VL-NEXT:    kmovw %k1, %edx
 ; AVX512VL-NEXT:    kshiftrw $2, %k0, %k1

diff  --git a/llvm/test/CodeGen/X86/vector-compare-simplify.ll b/llvm/test/CodeGen/X86/vector-compare-simplify.ll
index ed7cbe3658bd..f1a6a44228b9 100644
--- a/llvm/test/CodeGen/X86/vector-compare-simplify.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-simplify.ll
@@ -36,7 +36,9 @@ define <4 x i32> @sgt_min(<4 x i32> %x) {
 define <4 x i32> @sle_min(<4 x i32> %x) {
 ; CHECK-LABEL: sle_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sle <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -78,7 +80,10 @@ define <4 x i32> @slt_max(<4 x i32> %x) {
 define <4 x i32> @sge_max(<4 x i32> %x) {
 ; CHECK-LABEL: sge_max:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sge <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -192,7 +197,10 @@ define <4 x i32> @slt_min_plus1(<4 x i32> %x) {
 define <4 x i32> @sge_min_plus1(<4 x i32> %x) {
 ; CHECK-LABEL: sge_min_plus1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649]
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sge <4 x i32> %x, <i32 -2147483647, i32 -2147483647, i32 -2147483647, i32 -2147483647>
   %r = sext <4 x i1> %cmp to <4 x i32>
@@ -212,9 +220,9 @@ define <4 x i32> @sgt_max_minus1(<4 x i32> %x) {
 define <4 x i32> @sle_max_minus1(<4 x i32> %x) {
 ; CHECK-LABEL: sle_max_minus1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sle <4 x i32> %x, <i32 2147483646, i32 2147483646, i32 2147483646, i32 2147483646>
   %r = sext <4 x i1> %cmp to <4 x i32>

diff  --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
index d182e126bc10..48189179974d 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
@@ -76,7 +76,8 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_1_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -115,8 +116,8 @@ define <16 x i8> @ult_2_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_2_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -165,34 +166,36 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
 ;
 ; SSSE3-LABEL: ugt_2_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
 ; SSSE3-NEXT:    psrlw $4, %xmm0
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    paddb %xmm4, %xmm1
-; SSSE3-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm0, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ugt_2_v16i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSE41-NEXT:    movdqa %xmm1, %xmm4
-; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
 ; SSE41-NEXT:    psrlw $4, %xmm0
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pshufb %xmm0, %xmm1
-; SSE41-NEXT:    paddb %xmm4, %xmm1
-; SSE41-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; SSE41-NEXT:    pmaxub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ugt_2_v16i8:
@@ -205,7 +208,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ugt_2_v16i8:
@@ -218,7 +222,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i8:
@@ -250,7 +255,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_2_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -311,8 +317,9 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
 ; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pshufb %xmm0, %xmm3
 ; SSSE3-NEXT:    paddb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; SSSE3-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSSE3-NEXT:    pminub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ult_3_v16i8:
@@ -327,8 +334,9 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
 ; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pshufb %xmm0, %xmm3
 ; SSE41-NEXT:    paddb %xmm4, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; SSE41-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE41-NEXT:    pminub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ult_3_v16i8:
@@ -341,8 +349,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ult_3_v16i8:
@@ -355,8 +363,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_3_v16i8:
@@ -391,8 +399,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_3_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -441,34 +449,36 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
 ;
 ; SSSE3-LABEL: ugt_3_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
 ; SSSE3-NEXT:    psrlw $4, %xmm0
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    paddb %xmm4, %xmm1
-; SSSE3-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm0, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ugt_3_v16i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSE41-NEXT:    movdqa %xmm1, %xmm4
-; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
 ; SSE41-NEXT:    psrlw $4, %xmm0
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pshufb %xmm0, %xmm1
-; SSE41-NEXT:    paddb %xmm4, %xmm1
-; SSE41-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; SSE41-NEXT:    pmaxub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ugt_3_v16i8:
@@ -481,7 +491,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ugt_3_v16i8:
@@ -494,7 +505,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i8:
@@ -526,7 +538,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_3_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -587,8 +600,9 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
 ; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pshufb %xmm0, %xmm3
 ; SSSE3-NEXT:    paddb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; SSSE3-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; SSSE3-NEXT:    pminub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ult_4_v16i8:
@@ -603,8 +617,9 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
 ; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pshufb %xmm0, %xmm3
 ; SSE41-NEXT:    paddb %xmm4, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; SSE41-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; SSE41-NEXT:    pminub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ult_4_v16i8:
@@ -617,8 +632,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ult_4_v16i8:
@@ -631,8 +646,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_4_v16i8:
@@ -667,8 +682,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_4_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -717,34 +732,36 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
 ;
 ; SSSE3-LABEL: ugt_4_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
 ; SSSE3-NEXT:    psrlw $4, %xmm0
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    paddb %xmm4, %xmm1
-; SSSE3-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm0, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ugt_4_v16i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSE41-NEXT:    movdqa %xmm1, %xmm4
-; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
 ; SSE41-NEXT:    psrlw $4, %xmm0
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pshufb %xmm0, %xmm1
-; SSE41-NEXT:    paddb %xmm4, %xmm1
-; SSE41-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; SSE41-NEXT:    pmaxub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ugt_4_v16i8:
@@ -757,7 +774,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ugt_4_v16i8:
@@ -770,7 +788,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i8:
@@ -802,7 +821,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_4_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -863,8 +883,9 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
 ; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pshufb %xmm0, %xmm3
 ; SSSE3-NEXT:    paddb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; SSSE3-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; SSSE3-NEXT:    pminub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ult_5_v16i8:
@@ -879,8 +900,9 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
 ; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pshufb %xmm0, %xmm3
 ; SSE41-NEXT:    paddb %xmm4, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; SSE41-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; SSE41-NEXT:    pminub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ult_5_v16i8:
@@ -893,8 +915,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ult_5_v16i8:
@@ -907,8 +929,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_5_v16i8:
@@ -943,8 +965,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_5_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
@@ -993,34 +1015,36 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
 ;
 ; SSSE3-LABEL: ugt_5_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
 ; SSSE3-NEXT:    psrlw $4, %xmm0
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    paddb %xmm4, %xmm1
-; SSSE3-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm0, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ugt_5_v16i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSE41-NEXT:    movdqa %xmm1, %xmm4
-; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
 ; SSE41-NEXT:    psrlw $4, %xmm0
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pshufb %xmm0, %xmm1
-; SSE41-NEXT:    paddb %xmm4, %xmm1
-; SSE41-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; SSE41-NEXT:    pmaxub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ugt_5_v16i8:
@@ -1033,7 +1057,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ugt_5_v16i8:
@@ -1046,7 +1071,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i8:
@@ -1078,7 +1104,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_5_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
@@ -1139,8 +1166,9 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
 ; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pshufb %xmm0, %xmm3
 ; SSSE3-NEXT:    paddb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; SSSE3-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; SSSE3-NEXT:    pminub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ult_6_v16i8:
@@ -1155,8 +1183,9 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
 ; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pshufb %xmm0, %xmm3
 ; SSE41-NEXT:    paddb %xmm4, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; SSE41-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; SSE41-NEXT:    pminub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ult_6_v16i8:
@@ -1169,8 +1198,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ult_6_v16i8:
@@ -1183,8 +1212,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_6_v16i8:
@@ -1219,8 +1248,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_6_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
@@ -1269,34 +1298,36 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
 ;
 ; SSSE3-LABEL: ugt_6_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
 ; SSSE3-NEXT:    psrlw $4, %xmm0
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    paddb %xmm4, %xmm1
-; SSSE3-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm0, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ugt_6_v16i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; SSE41-NEXT:    movdqa %xmm1, %xmm4
-; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
 ; SSE41-NEXT:    psrlw $4, %xmm0
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pshufb %xmm0, %xmm1
-; SSE41-NEXT:    paddb %xmm4, %xmm1
-; SSE41-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; SSE41-NEXT:    pmaxub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ugt_6_v16i8:
@@ -1309,7 +1340,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ugt_6_v16i8:
@@ -1322,7 +1354,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i8:
@@ -1354,7 +1387,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ugt_6_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ugt <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
@@ -1415,8 +1449,9 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
 ; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pshufb %xmm0, %xmm3
 ; SSSE3-NEXT:    paddb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; SSSE3-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; SSSE3-NEXT:    pminub %xmm3, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ult_7_v16i8:
@@ -1431,8 +1466,9 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
 ; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pshufb %xmm0, %xmm3
 ; SSE41-NEXT:    paddb %xmm4, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; SSE41-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; SSE41-NEXT:    pminub %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ult_7_v16i8:
@@ -1445,8 +1481,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ult_7_v16i8:
@@ -1459,8 +1495,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_7_v16i8:
@@ -1495,8 +1531,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
 ; BITALG-LABEL: ult_7_v16i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
   %3 = icmp ult <16 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -1569,7 +1605,8 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_1_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1608,8 +1645,8 @@ define <8 x i16> @ult_2_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_2_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -1763,7 +1800,8 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_2_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -1926,8 +1964,8 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_3_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -2081,7 +2119,8 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_3_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -2244,8 +2283,8 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_4_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -2399,7 +2438,8 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_4_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -2562,8 +2602,8 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_5_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -2717,7 +2757,8 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_5_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -2880,8 +2921,8 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_6_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -3035,7 +3076,8 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_6_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -3198,8 +3240,8 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_7_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -3353,7 +3395,8 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_7_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -3516,8 +3559,8 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_8_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -3671,7 +3714,8 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_8_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -3834,8 +3878,8 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_9_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -3989,7 +4033,8 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_9_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -4152,8 +4197,8 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_10_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
@@ -4307,7 +4352,8 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_10_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
@@ -4470,8 +4516,8 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_11_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -4625,7 +4671,8 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_11_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -4788,8 +4835,8 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_12_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
@@ -4943,7 +4990,8 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_12_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
@@ -5106,8 +5154,8 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_13_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
@@ -5261,7 +5309,8 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_13_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
@@ -5424,8 +5473,8 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_14_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
@@ -5579,7 +5628,8 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ugt_14_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ugt <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
@@ -5742,8 +5792,8 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) {
 ; BITALG-LABEL: ult_15_v8i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
-; BITALG-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %xmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
   %3 = icmp ult <8 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -5795,8 +5845,9 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_1_v4i32:
@@ -5866,8 +5917,9 @@ define <4 x i32> @ult_2_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_2_v4i32:
@@ -6041,8 +6093,9 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_2_v4i32:
@@ -6069,8 +6122,9 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
@@ -6230,8 +6284,9 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_3_v4i32:
@@ -6258,8 +6313,9 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
@@ -6415,8 +6471,9 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_3_v4i32:
@@ -6443,8 +6500,9 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
@@ -6604,8 +6662,9 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_4_v4i32:
@@ -6632,8 +6691,9 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
@@ -6789,8 +6849,9 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_4_v4i32:
@@ -6817,8 +6878,9 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
@@ -6978,8 +7040,9 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_5_v4i32:
@@ -7006,8 +7069,9 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
@@ -7163,8 +7227,9 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_5_v4i32:
@@ -7191,8 +7256,9 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
@@ -7352,8 +7418,9 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_6_v4i32:
@@ -7380,8 +7447,9 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
@@ -7537,8 +7605,9 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_6_v4i32:
@@ -7565,8 +7634,9 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
@@ -7726,8 +7796,9 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_7_v4i32:
@@ -7754,8 +7825,9 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
@@ -7911,8 +7983,9 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_7_v4i32:
@@ -7939,8 +8012,9 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
@@ -8100,8 +8174,9 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_8_v4i32:
@@ -8128,8 +8203,9 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
@@ -8285,8 +8361,9 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_8_v4i32:
@@ -8313,8 +8390,9 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
@@ -8474,8 +8552,9 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_9_v4i32:
@@ -8502,8 +8581,9 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
@@ -8659,8 +8739,9 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_9_v4i32:
@@ -8687,8 +8768,9 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
@@ -8848,8 +8930,9 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_10_v4i32:
@@ -8876,8 +8959,9 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
@@ -9033,8 +9117,9 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_10_v4i32:
@@ -9061,8 +9146,9 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
@@ -9222,8 +9308,9 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_11_v4i32:
@@ -9250,8 +9337,9 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
@@ -9407,8 +9495,9 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_11_v4i32:
@@ -9435,8 +9524,9 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
@@ -9596,8 +9686,9 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_12_v4i32:
@@ -9624,8 +9715,9 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
@@ -9781,8 +9873,9 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_12_v4i32:
@@ -9809,8 +9902,9 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
@@ -9970,8 +10064,9 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_13_v4i32:
@@ -9998,8 +10093,9 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
@@ -10155,8 +10251,9 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_13_v4i32:
@@ -10183,8 +10280,9 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
@@ -10344,8 +10442,9 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_14_v4i32:
@@ -10372,8 +10471,9 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
@@ -10529,8 +10629,9 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_14_v4i32:
@@ -10557,8 +10658,9 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
@@ -10718,8 +10820,9 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_15_v4i32:
@@ -10746,8 +10849,9 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
@@ -10903,8 +11007,9 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_15_v4i32:
@@ -10931,8 +11036,9 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
@@ -11092,8 +11198,9 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_16_v4i32:
@@ -11120,8 +11227,9 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
@@ -11277,8 +11385,9 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_16_v4i32:
@@ -11305,8 +11414,9 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
@@ -11466,8 +11576,9 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_17_v4i32:
@@ -11494,8 +11605,9 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
@@ -11651,8 +11763,9 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_17_v4i32:
@@ -11679,8 +11792,9 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
@@ -11840,8 +11954,9 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_18_v4i32:
@@ -11868,8 +11983,9 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
@@ -12025,8 +12141,9 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_18_v4i32:
@@ -12053,8 +12170,9 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
@@ -12214,8 +12332,9 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_19_v4i32:
@@ -12242,8 +12361,9 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
@@ -12399,8 +12519,9 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_19_v4i32:
@@ -12427,8 +12548,9 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
@@ -12588,8 +12710,9 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_20_v4i32:
@@ -12616,8 +12739,9 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
@@ -12773,8 +12897,9 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_20_v4i32:
@@ -12801,8 +12926,9 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
@@ -12962,8 +13088,9 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_21_v4i32:
@@ -12990,8 +13117,9 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
@@ -13147,8 +13275,9 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_21_v4i32:
@@ -13175,8 +13304,9 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
@@ -13336,8 +13466,9 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_22_v4i32:
@@ -13364,8 +13495,9 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
@@ -13521,8 +13653,9 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_22_v4i32:
@@ -13549,8 +13682,9 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
@@ -13710,8 +13844,9 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_23_v4i32:
@@ -13738,8 +13873,9 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
@@ -13895,8 +14031,9 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_23_v4i32:
@@ -13923,8 +14060,9 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
@@ -14084,8 +14222,9 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_24_v4i32:
@@ -14112,8 +14251,9 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
@@ -14269,8 +14409,9 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_24_v4i32:
@@ -14297,8 +14438,9 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
@@ -14458,8 +14600,9 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_25_v4i32:
@@ -14486,8 +14629,9 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
@@ -14643,8 +14787,9 @@ define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_25_v4i32:
@@ -14671,8 +14816,9 @@ define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
@@ -14832,8 +14978,9 @@ define <4 x i32> @ult_26_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_26_v4i32:
@@ -14860,8 +15007,9 @@ define <4 x i32> @ult_26_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
@@ -15017,8 +15165,9 @@ define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_26_v4i32:
@@ -15045,8 +15194,9 @@ define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
@@ -15206,8 +15356,9 @@ define <4 x i32> @ult_27_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_27_v4i32:
@@ -15234,8 +15385,9 @@ define <4 x i32> @ult_27_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
@@ -15391,8 +15543,9 @@ define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_27_v4i32:
@@ -15419,8 +15572,9 @@ define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
@@ -15580,8 +15734,9 @@ define <4 x i32> @ult_28_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_28_v4i32:
@@ -15608,8 +15763,9 @@ define <4 x i32> @ult_28_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
@@ -15765,8 +15921,9 @@ define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_28_v4i32:
@@ -15793,8 +15950,9 @@ define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
@@ -15954,8 +16112,9 @@ define <4 x i32> @ult_29_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_29_v4i32:
@@ -15982,8 +16141,9 @@ define <4 x i32> @ult_29_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
@@ -16139,8 +16299,9 @@ define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_29_v4i32:
@@ -16167,8 +16328,9 @@ define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
@@ -16328,8 +16490,9 @@ define <4 x i32> @ult_30_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_30_v4i32:
@@ -16356,8 +16519,9 @@ define <4 x i32> @ult_30_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
@@ -16513,8 +16677,9 @@ define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_30_v4i32:
@@ -16541,8 +16706,9 @@ define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
-; BITALG-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ugt <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
@@ -16702,8 +16868,9 @@ define <4 x i32> @ult_31_v4i32(<4 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_31_v4i32:
@@ -16730,8 +16897,9 @@ define <4 x i32> @ult_31_v4i32(<4 x i32> %0) {
 ; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
-; BITALG-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
   %3 = icmp ult <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
@@ -16821,8 +16989,9 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_1_v2i64:
@@ -16925,8 +17094,9 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_2_v2i64:
@@ -17098,8 +17268,9 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_2_v2i64:
@@ -17117,8 +17288,9 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 2, i64 2>
@@ -17279,8 +17451,9 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_3_v2i64:
@@ -17299,8 +17472,9 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 3, i64 3>
@@ -17454,8 +17628,9 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_3_v2i64:
@@ -17473,8 +17648,9 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 3, i64 3>
@@ -17635,8 +17811,9 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_4_v2i64:
@@ -17655,8 +17832,9 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 4, i64 4>
@@ -17810,8 +17988,9 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_4_v2i64:
@@ -17829,8 +18008,9 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 4, i64 4>
@@ -17991,8 +18171,9 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_5_v2i64:
@@ -18011,8 +18192,9 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 5, i64 5>
@@ -18166,8 +18348,9 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_5_v2i64:
@@ -18185,8 +18368,9 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 5, i64 5>
@@ -18347,8 +18531,9 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_6_v2i64:
@@ -18367,8 +18552,9 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 6, i64 6>
@@ -18522,8 +18708,9 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_6_v2i64:
@@ -18541,8 +18728,9 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 6, i64 6>
@@ -18703,8 +18891,9 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_7_v2i64:
@@ -18723,8 +18912,9 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 7, i64 7>
@@ -18878,8 +19068,9 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_7_v2i64:
@@ -18897,8 +19088,9 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 7, i64 7>
@@ -19059,8 +19251,9 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_8_v2i64:
@@ -19079,8 +19272,9 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 8, i64 8>
@@ -19234,8 +19428,9 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_8_v2i64:
@@ -19253,8 +19448,9 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 8, i64 8>
@@ -19415,8 +19611,9 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_9_v2i64:
@@ -19435,8 +19632,9 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 9, i64 9>
@@ -19590,8 +19788,9 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_9_v2i64:
@@ -19609,8 +19808,9 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 9, i64 9>
@@ -19771,8 +19971,9 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_10_v2i64:
@@ -19791,8 +19992,9 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 10, i64 10>
@@ -19946,8 +20148,9 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_10_v2i64:
@@ -19965,8 +20168,9 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 10, i64 10>
@@ -20127,8 +20331,9 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_11_v2i64:
@@ -20147,8 +20352,9 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 11, i64 11>
@@ -20302,8 +20508,9 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_11_v2i64:
@@ -20321,8 +20528,9 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 11, i64 11>
@@ -20483,8 +20691,9 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_12_v2i64:
@@ -20503,8 +20712,9 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 12, i64 12>
@@ -20658,8 +20868,9 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_12_v2i64:
@@ -20677,8 +20888,9 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 12, i64 12>
@@ -20839,8 +21051,9 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_13_v2i64:
@@ -20859,8 +21072,9 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 13, i64 13>
@@ -21014,8 +21228,9 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_13_v2i64:
@@ -21033,8 +21248,9 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 13, i64 13>
@@ -21195,8 +21411,9 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_14_v2i64:
@@ -21215,8 +21432,9 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 14, i64 14>
@@ -21370,8 +21588,9 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_14_v2i64:
@@ -21389,8 +21608,9 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 14, i64 14>
@@ -21551,8 +21771,9 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_15_v2i64:
@@ -21571,8 +21792,9 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 15, i64 15>
@@ -21726,8 +21948,9 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_15_v2i64:
@@ -21745,8 +21968,9 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 15, i64 15>
@@ -21907,8 +22131,9 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_16_v2i64:
@@ -21927,8 +22152,9 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 16, i64 16>
@@ -22082,8 +22308,9 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_16_v2i64:
@@ -22101,8 +22328,9 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 16, i64 16>
@@ -22263,8 +22491,9 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_17_v2i64:
@@ -22283,8 +22512,9 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 17, i64 17>
@@ -22438,8 +22668,9 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_17_v2i64:
@@ -22457,8 +22688,9 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 17, i64 17>
@@ -22619,8 +22851,9 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_18_v2i64:
@@ -22639,8 +22872,9 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 18, i64 18>
@@ -22794,8 +23028,9 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_18_v2i64:
@@ -22813,8 +23048,9 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 18, i64 18>
@@ -22975,8 +23211,9 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_19_v2i64:
@@ -22995,8 +23232,9 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 19, i64 19>
@@ -23150,8 +23388,9 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_19_v2i64:
@@ -23169,8 +23408,9 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 19, i64 19>
@@ -23331,8 +23571,9 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_20_v2i64:
@@ -23351,8 +23592,9 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 20, i64 20>
@@ -23506,8 +23748,9 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_20_v2i64:
@@ -23525,8 +23768,9 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 20, i64 20>
@@ -23687,8 +23931,9 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_21_v2i64:
@@ -23707,8 +23952,9 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 21, i64 21>
@@ -23862,8 +24108,9 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_21_v2i64:
@@ -23881,8 +24128,9 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 21, i64 21>
@@ -24043,8 +24291,9 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_22_v2i64:
@@ -24063,8 +24312,9 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 22, i64 22>
@@ -24218,8 +24468,9 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_22_v2i64:
@@ -24237,8 +24488,9 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 22, i64 22>
@@ -24399,8 +24651,9 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_23_v2i64:
@@ -24419,8 +24672,9 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 23, i64 23>
@@ -24574,8 +24828,9 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_23_v2i64:
@@ -24593,8 +24848,9 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 23, i64 23>
@@ -24755,8 +25011,9 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_24_v2i64:
@@ -24775,8 +25032,9 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 24, i64 24>
@@ -24930,8 +25188,9 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_24_v2i64:
@@ -24949,8 +25208,9 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 24, i64 24>
@@ -25111,8 +25371,9 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_25_v2i64:
@@ -25131,8 +25392,9 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 25, i64 25>
@@ -25286,8 +25548,9 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_25_v2i64:
@@ -25305,8 +25568,9 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 25, i64 25>
@@ -25467,8 +25731,9 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_26_v2i64:
@@ -25487,8 +25752,9 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 26, i64 26>
@@ -25642,8 +25908,9 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_26_v2i64:
@@ -25661,8 +25928,9 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 26, i64 26>
@@ -25823,8 +26091,9 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_27_v2i64:
@@ -25843,8 +26112,9 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 27, i64 27>
@@ -25998,8 +26268,9 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_27_v2i64:
@@ -26017,8 +26288,9 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 27, i64 27>
@@ -26179,8 +26451,9 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_28_v2i64:
@@ -26199,8 +26472,9 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 28, i64 28>
@@ -26354,8 +26628,9 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_28_v2i64:
@@ -26373,8 +26648,9 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 28, i64 28>
@@ -26535,8 +26811,9 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_29_v2i64:
@@ -26555,8 +26832,9 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 29, i64 29>
@@ -26710,8 +26988,9 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_29_v2i64:
@@ -26729,8 +27008,9 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 29, i64 29>
@@ -26891,8 +27171,9 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_30_v2i64:
@@ -26911,8 +27192,9 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 30, i64 30>
@@ -27066,8 +27348,9 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_30_v2i64:
@@ -27085,8 +27368,9 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 30, i64 30>
@@ -27247,8 +27531,9 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_31_v2i64:
@@ -27267,8 +27552,9 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 31, i64 31>
@@ -27422,8 +27708,9 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_31_v2i64:
@@ -27441,8 +27728,9 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 31, i64 31>
@@ -27603,8 +27891,9 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_32_v2i64:
@@ -27623,8 +27912,9 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 32, i64 32>
@@ -27778,8 +28068,9 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_32_v2i64:
@@ -27797,8 +28088,9 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 32, i64 32>
@@ -27959,8 +28251,9 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_33_v2i64:
@@ -27979,8 +28272,9 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 33, i64 33>
@@ -28134,8 +28428,9 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_33_v2i64:
@@ -28153,8 +28448,9 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 33, i64 33>
@@ -28315,8 +28611,9 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_34_v2i64:
@@ -28335,8 +28632,9 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 34, i64 34>
@@ -28490,8 +28788,9 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_34_v2i64:
@@ -28509,8 +28808,9 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 34, i64 34>
@@ -28671,8 +28971,9 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_35_v2i64:
@@ -28691,8 +28992,9 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 35, i64 35>
@@ -28846,8 +29148,9 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_35_v2i64:
@@ -28865,8 +29168,9 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 35, i64 35>
@@ -29027,8 +29331,9 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_36_v2i64:
@@ -29047,8 +29352,9 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 36, i64 36>
@@ -29202,8 +29508,9 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_36_v2i64:
@@ -29221,8 +29528,9 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 36, i64 36>
@@ -29383,8 +29691,9 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_37_v2i64:
@@ -29403,8 +29712,9 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 37, i64 37>
@@ -29558,8 +29868,9 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_37_v2i64:
@@ -29577,8 +29888,9 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 37, i64 37>
@@ -29739,8 +30051,9 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_38_v2i64:
@@ -29759,8 +30072,9 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 38, i64 38>
@@ -29914,8 +30228,9 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_38_v2i64:
@@ -29933,8 +30248,9 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 38, i64 38>
@@ -30095,8 +30411,9 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_39_v2i64:
@@ -30115,8 +30432,9 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 39, i64 39>
@@ -30270,8 +30588,9 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_39_v2i64:
@@ -30289,8 +30608,9 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 39, i64 39>
@@ -30451,8 +30771,9 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_40_v2i64:
@@ -30471,8 +30792,9 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 40, i64 40>
@@ -30626,8 +30948,9 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_40_v2i64:
@@ -30645,8 +30968,9 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 40, i64 40>
@@ -30807,8 +31131,9 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_41_v2i64:
@@ -30827,8 +31152,9 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 41, i64 41>
@@ -30982,8 +31308,9 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_41_v2i64:
@@ -31001,8 +31328,9 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 41, i64 41>
@@ -31163,8 +31491,9 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_42_v2i64:
@@ -31183,8 +31512,9 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 42, i64 42>
@@ -31338,8 +31668,9 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_42_v2i64:
@@ -31357,8 +31688,9 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 42, i64 42>
@@ -31519,8 +31851,9 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_43_v2i64:
@@ -31539,8 +31872,9 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 43, i64 43>
@@ -31694,8 +32028,9 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_43_v2i64:
@@ -31713,8 +32048,9 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 43, i64 43>
@@ -31875,8 +32211,9 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_44_v2i64:
@@ -31895,8 +32232,9 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 44, i64 44>
@@ -32050,8 +32388,9 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_44_v2i64:
@@ -32069,8 +32408,9 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 44, i64 44>
@@ -32231,8 +32571,9 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_45_v2i64:
@@ -32251,8 +32592,9 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 45, i64 45>
@@ -32406,8 +32748,9 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_45_v2i64:
@@ -32425,8 +32768,9 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 45, i64 45>
@@ -32587,8 +32931,9 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_46_v2i64:
@@ -32607,8 +32952,9 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 46, i64 46>
@@ -32762,8 +33108,9 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_46_v2i64:
@@ -32781,8 +33128,9 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 46, i64 46>
@@ -32943,8 +33291,9 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_47_v2i64:
@@ -32963,8 +33312,9 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 47, i64 47>
@@ -33118,8 +33468,9 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_47_v2i64:
@@ -33137,8 +33488,9 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 47, i64 47>
@@ -33299,8 +33651,9 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_48_v2i64:
@@ -33319,8 +33672,9 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 48, i64 48>
@@ -33474,8 +33828,9 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_48_v2i64:
@@ -33493,8 +33848,9 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 48, i64 48>
@@ -33655,8 +34011,9 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_49_v2i64:
@@ -33675,8 +34032,9 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 49, i64 49>
@@ -33830,8 +34188,9 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_49_v2i64:
@@ -33849,8 +34208,9 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 49, i64 49>
@@ -34011,8 +34371,9 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_50_v2i64:
@@ -34031,8 +34392,9 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 50, i64 50>
@@ -34186,8 +34548,9 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_50_v2i64:
@@ -34205,8 +34568,9 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 50, i64 50>
@@ -34367,8 +34731,9 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_51_v2i64:
@@ -34387,8 +34752,9 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 51, i64 51>
@@ -34542,8 +34908,9 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_51_v2i64:
@@ -34561,8 +34928,9 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 51, i64 51>
@@ -34723,8 +35091,9 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_52_v2i64:
@@ -34743,8 +35112,9 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 52, i64 52>
@@ -34898,8 +35268,9 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_52_v2i64:
@@ -34917,8 +35288,9 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 52, i64 52>
@@ -35079,8 +35451,9 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_53_v2i64:
@@ -35099,8 +35472,9 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 53, i64 53>
@@ -35254,8 +35628,9 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_53_v2i64:
@@ -35273,8 +35648,9 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 53, i64 53>
@@ -35435,8 +35811,9 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_54_v2i64:
@@ -35455,8 +35832,9 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 54, i64 54>
@@ -35610,8 +35988,9 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_54_v2i64:
@@ -35629,8 +36008,9 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 54, i64 54>
@@ -35791,8 +36171,9 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_55_v2i64:
@@ -35811,8 +36192,9 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 55, i64 55>
@@ -35966,8 +36348,9 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_55_v2i64:
@@ -35985,8 +36368,9 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 55, i64 55>
@@ -36147,8 +36531,9 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_56_v2i64:
@@ -36167,8 +36552,9 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 56, i64 56>
@@ -36322,8 +36708,9 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_56_v2i64:
@@ -36341,8 +36728,9 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 56, i64 56>
@@ -36503,8 +36891,9 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_57_v2i64:
@@ -36523,8 +36912,9 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 57, i64 57>
@@ -36678,8 +37068,9 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_57_v2i64:
@@ -36697,8 +37088,9 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 57, i64 57>
@@ -36859,8 +37251,9 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_58_v2i64:
@@ -36879,8 +37272,9 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 58, i64 58>
@@ -37034,8 +37428,9 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_58_v2i64:
@@ -37053,8 +37448,9 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 58, i64 58>
@@ -37215,8 +37611,9 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_59_v2i64:
@@ -37235,8 +37632,9 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 59, i64 59>
@@ -37390,8 +37788,9 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_59_v2i64:
@@ -37409,8 +37808,9 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 59, i64 59>
@@ -37571,8 +37971,9 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_60_v2i64:
@@ -37591,8 +37992,9 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 60, i64 60>
@@ -37746,8 +38148,9 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_60_v2i64:
@@ -37765,8 +38168,9 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 60, i64 60>
@@ -37927,8 +38331,9 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_61_v2i64:
@@ -37947,8 +38352,9 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 61, i64 61>
@@ -38102,8 +38508,9 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_61_v2i64:
@@ -38121,8 +38528,9 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 61, i64 61>
@@ -38283,8 +38691,9 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_62_v2i64:
@@ -38303,8 +38712,9 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 62, i64 62>
@@ -38458,8 +38868,9 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_62_v2i64:
@@ -38477,8 +38888,9 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
-; BITALG-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ugt <2 x i64> %2, <i64 62, i64 62>
@@ -38639,8 +39051,9 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [63,63]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_63_v2i64:
@@ -38659,8 +39072,9 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [63,63]
-; BITALG-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
+; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
   %3 = icmp ult <2 x i64> %2, <i64 63, i64 63>

diff  --git a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
index c5bb1dfe6001..17fb2bfe197d 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
@@ -65,7 +65,8 @@ define <32 x i8> @ugt_1_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_1_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -126,8 +127,8 @@ define <32 x i8> @ult_2_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_2_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -153,9 +154,11 @@ define <32 x i8> @ugt_2_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -169,7 +172,8 @@ define <32 x i8> @ugt_2_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v32i8:
@@ -182,7 +186,8 @@ define <32 x i8> @ugt_2_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v32i8:
@@ -195,7 +200,8 @@ define <32 x i8> @ugt_2_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_2_v32i8:
@@ -208,7 +214,8 @@ define <32 x i8> @ugt_2_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_2_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -234,9 +241,11 @@ define <32 x i8> @ult_3_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -250,8 +259,8 @@ define <32 x i8> @ult_3_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_3_v32i8:
@@ -264,8 +273,8 @@ define <32 x i8> @ult_3_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v32i8:
@@ -278,8 +287,8 @@ define <32 x i8> @ult_3_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_3_v32i8:
@@ -293,8 +302,8 @@ define <32 x i8> @ult_3_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_3_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -320,9 +329,11 @@ define <32 x i8> @ugt_3_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -336,7 +347,8 @@ define <32 x i8> @ugt_3_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v32i8:
@@ -349,7 +361,8 @@ define <32 x i8> @ugt_3_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v32i8:
@@ -362,7 +375,8 @@ define <32 x i8> @ugt_3_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_3_v32i8:
@@ -375,7 +389,8 @@ define <32 x i8> @ugt_3_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_3_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -401,9 +416,11 @@ define <32 x i8> @ult_4_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -417,8 +434,8 @@ define <32 x i8> @ult_4_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_4_v32i8:
@@ -431,8 +448,8 @@ define <32 x i8> @ult_4_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v32i8:
@@ -445,8 +462,8 @@ define <32 x i8> @ult_4_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_4_v32i8:
@@ -460,8 +477,8 @@ define <32 x i8> @ult_4_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_4_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -487,9 +504,11 @@ define <32 x i8> @ugt_4_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -503,7 +522,8 @@ define <32 x i8> @ugt_4_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v32i8:
@@ -516,7 +536,8 @@ define <32 x i8> @ugt_4_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v32i8:
@@ -529,7 +550,8 @@ define <32 x i8> @ugt_4_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_4_v32i8:
@@ -542,7 +564,8 @@ define <32 x i8> @ugt_4_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_4_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -568,9 +591,11 @@ define <32 x i8> @ult_5_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -584,8 +609,8 @@ define <32 x i8> @ult_5_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_5_v32i8:
@@ -598,8 +623,8 @@ define <32 x i8> @ult_5_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v32i8:
@@ -612,8 +637,8 @@ define <32 x i8> @ult_5_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_5_v32i8:
@@ -627,8 +652,8 @@ define <32 x i8> @ult_5_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_5_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
@@ -654,9 +679,11 @@ define <32 x i8> @ugt_5_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -670,7 +697,8 @@ define <32 x i8> @ugt_5_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v32i8:
@@ -683,7 +711,8 @@ define <32 x i8> @ugt_5_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v32i8:
@@ -696,7 +725,8 @@ define <32 x i8> @ugt_5_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_5_v32i8:
@@ -709,7 +739,8 @@ define <32 x i8> @ugt_5_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_5_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
@@ -735,9 +766,11 @@ define <32 x i8> @ult_6_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -751,8 +784,8 @@ define <32 x i8> @ult_6_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_6_v32i8:
@@ -765,8 +798,8 @@ define <32 x i8> @ult_6_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v32i8:
@@ -779,8 +812,8 @@ define <32 x i8> @ult_6_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_6_v32i8:
@@ -794,8 +827,8 @@ define <32 x i8> @ult_6_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_6_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
@@ -821,9 +854,11 @@ define <32 x i8> @ugt_6_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -837,7 +872,8 @@ define <32 x i8> @ugt_6_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v32i8:
@@ -850,7 +886,8 @@ define <32 x i8> @ugt_6_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v32i8:
@@ -863,7 +900,8 @@ define <32 x i8> @ugt_6_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_6_v32i8:
@@ -876,7 +914,8 @@ define <32 x i8> @ugt_6_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ugt_6_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ugt <32 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
@@ -902,9 +941,11 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) {
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -918,8 +959,8 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) {
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_7_v32i8:
@@ -932,8 +973,8 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v32i8:
@@ -946,8 +987,8 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) {
 ; AVX512VPOPCNTDQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_7_v32i8:
@@ -961,8 +1002,8 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) {
 ; BITALG-LABEL: ult_7_v32i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2b %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0)
   %3 = icmp ult <32 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -1028,7 +1069,8 @@ define <16 x i16> @ugt_1_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_1_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1089,8 +1131,8 @@ define <16 x i16> @ult_2_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_2_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -1170,7 +1212,8 @@ define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_2_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -1254,8 +1297,8 @@ define <16 x i16> @ult_3_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_3_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1335,7 +1378,8 @@ define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_3_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1419,8 +1463,8 @@ define <16 x i16> @ult_4_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_4_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -1500,7 +1544,8 @@ define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_4_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -1584,8 +1629,8 @@ define <16 x i16> @ult_5_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_5_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -1665,7 +1710,8 @@ define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_5_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -1749,8 +1795,8 @@ define <16 x i16> @ult_6_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_6_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -1830,7 +1876,8 @@ define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_6_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -1914,8 +1961,8 @@ define <16 x i16> @ult_7_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_7_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -1995,7 +2042,8 @@ define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_7_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -2079,8 +2127,8 @@ define <16 x i16> @ult_8_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_8_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -2160,7 +2208,8 @@ define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_8_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -2244,8 +2293,8 @@ define <16 x i16> @ult_9_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_9_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -2325,7 +2374,8 @@ define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_9_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -2409,8 +2459,8 @@ define <16 x i16> @ult_10_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_10_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
@@ -2490,7 +2540,8 @@ define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_10_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
@@ -2574,8 +2625,8 @@ define <16 x i16> @ult_11_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_11_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -2655,7 +2706,8 @@ define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_11_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -2739,8 +2791,8 @@ define <16 x i16> @ult_12_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_12_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
@@ -2820,7 +2872,8 @@ define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_12_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
@@ -2904,8 +2957,8 @@ define <16 x i16> @ult_13_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_13_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
@@ -2985,7 +3038,8 @@ define <16 x i16> @ugt_13_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_13_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
@@ -3069,8 +3123,8 @@ define <16 x i16> @ult_14_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_14_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
@@ -3150,7 +3204,8 @@ define <16 x i16> @ugt_14_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ugt_14_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ugt <16 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
@@ -3234,8 +3289,8 @@ define <16 x i16> @ult_15_v16i16(<16 x i16> %0) {
 ; BITALG-LABEL: ult_15_v16i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %ymm0, %ymm0
-; BITALG-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; BITALG-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; BITALG-NEXT:    vpmovm2w %k0, %ymm0
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0)
   %3 = icmp ult <16 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -3281,8 +3336,9 @@ define <8 x i32> @ugt_1_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_1_v8i32:
@@ -3346,8 +3402,9 @@ define <8 x i32> @ult_2_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_2_v8i32:
@@ -3439,8 +3496,9 @@ define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_2_v8i32:
@@ -3466,8 +3524,9 @@ define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -3541,8 +3600,9 @@ define <8 x i32> @ult_3_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_3_v8i32:
@@ -3568,8 +3628,9 @@ define <8 x i32> @ult_3_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -3643,8 +3704,9 @@ define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_3_v8i32:
@@ -3670,8 +3732,9 @@ define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -3745,8 +3808,9 @@ define <8 x i32> @ult_4_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_4_v8i32:
@@ -3772,8 +3836,9 @@ define <8 x i32> @ult_4_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
@@ -3847,8 +3912,9 @@ define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_4_v8i32:
@@ -3874,8 +3940,9 @@ define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
@@ -3949,8 +4016,9 @@ define <8 x i32> @ult_5_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_5_v8i32:
@@ -3976,8 +4044,9 @@ define <8 x i32> @ult_5_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -4051,8 +4120,9 @@ define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_5_v8i32:
@@ -4078,8 +4148,9 @@ define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -4153,8 +4224,9 @@ define <8 x i32> @ult_6_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_6_v8i32:
@@ -4180,8 +4252,9 @@ define <8 x i32> @ult_6_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
@@ -4255,8 +4328,9 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_6_v8i32:
@@ -4282,8 +4356,9 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
@@ -4357,8 +4432,9 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_7_v8i32:
@@ -4384,8 +4460,9 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -4459,8 +4536,9 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_7_v8i32:
@@ -4486,8 +4564,9 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -4561,8 +4640,9 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_8_v8i32:
@@ -4588,8 +4668,9 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
@@ -4663,8 +4744,9 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_8_v8i32:
@@ -4690,8 +4772,9 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
@@ -4765,8 +4848,9 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_9_v8i32:
@@ -4792,8 +4876,9 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
@@ -4867,8 +4952,9 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_9_v8i32:
@@ -4894,8 +4980,9 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
@@ -4969,8 +5056,9 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_10_v8i32:
@@ -4996,8 +5084,9 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
@@ -5071,8 +5160,9 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_10_v8i32:
@@ -5098,8 +5188,9 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
@@ -5173,8 +5264,9 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_11_v8i32:
@@ -5200,8 +5292,9 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
@@ -5275,8 +5368,9 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_11_v8i32:
@@ -5302,8 +5396,9 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
@@ -5377,8 +5472,9 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_12_v8i32:
@@ -5404,8 +5500,9 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
@@ -5479,8 +5576,9 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_12_v8i32:
@@ -5506,8 +5604,9 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
@@ -5581,8 +5680,9 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_13_v8i32:
@@ -5608,8 +5708,9 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
@@ -5683,8 +5784,9 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_13_v8i32:
@@ -5710,8 +5812,9 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
@@ -5785,8 +5888,9 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_14_v8i32:
@@ -5812,8 +5916,9 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
@@ -5887,8 +5992,9 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_14_v8i32:
@@ -5914,8 +6020,9 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
@@ -5989,8 +6096,9 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_15_v8i32:
@@ -6016,8 +6124,9 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
@@ -6091,8 +6200,9 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_15_v8i32:
@@ -6118,8 +6228,9 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
@@ -6193,8 +6304,9 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_16_v8i32:
@@ -6220,8 +6332,9 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -6295,8 +6408,9 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_16_v8i32:
@@ -6322,8 +6436,9 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -6397,8 +6512,9 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_17_v8i32:
@@ -6424,8 +6540,9 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
@@ -6499,8 +6616,9 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_17_v8i32:
@@ -6526,8 +6644,9 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
@@ -6601,8 +6720,9 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_18_v8i32:
@@ -6628,8 +6748,9 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18>
@@ -6703,8 +6824,9 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_18_v8i32:
@@ -6730,8 +6852,9 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18>
@@ -6805,8 +6928,9 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_19_v8i32:
@@ -6832,8 +6956,9 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19>
@@ -6907,8 +7032,9 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_19_v8i32:
@@ -6934,8 +7060,9 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19>
@@ -7009,8 +7136,9 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_20_v8i32:
@@ -7036,8 +7164,9 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20>
@@ -7111,8 +7240,9 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_20_v8i32:
@@ -7138,8 +7268,9 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20>
@@ -7213,8 +7344,9 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_21_v8i32:
@@ -7240,8 +7372,9 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21>
@@ -7315,8 +7448,9 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_21_v8i32:
@@ -7342,8 +7476,9 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21>
@@ -7417,8 +7552,9 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_22_v8i32:
@@ -7444,8 +7580,9 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>
@@ -7519,8 +7656,9 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_22_v8i32:
@@ -7546,8 +7684,9 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>
@@ -7621,8 +7760,9 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_23_v8i32:
@@ -7648,8 +7788,9 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
@@ -7723,8 +7864,9 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_23_v8i32:
@@ -7750,8 +7892,9 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
@@ -7825,8 +7968,9 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_24_v8i32:
@@ -7852,8 +7996,9 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
@@ -7927,8 +8072,9 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_24_v8i32:
@@ -7954,8 +8100,9 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
@@ -8029,8 +8176,9 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_25_v8i32:
@@ -8056,8 +8204,9 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
@@ -8131,8 +8280,9 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_25_v8i32:
@@ -8158,8 +8308,9 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
@@ -8233,8 +8384,9 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_26_v8i32:
@@ -8260,8 +8412,9 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26>
@@ -8335,8 +8488,9 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_26_v8i32:
@@ -8362,8 +8516,9 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26>
@@ -8437,8 +8592,9 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_27_v8i32:
@@ -8464,8 +8620,9 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
@@ -8539,8 +8696,9 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_27_v8i32:
@@ -8566,8 +8724,9 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
@@ -8641,8 +8800,9 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_28_v8i32:
@@ -8668,8 +8828,9 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
@@ -8743,8 +8904,9 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_28_v8i32:
@@ -8770,8 +8932,9 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
@@ -8845,8 +9008,9 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_29_v8i32:
@@ -8872,8 +9036,9 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29>
@@ -8947,8 +9112,9 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_29_v8i32:
@@ -8974,8 +9140,9 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29>
@@ -9049,8 +9216,9 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_30_v8i32:
@@ -9076,8 +9244,9 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30>
@@ -9151,8 +9320,9 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_30_v8i32:
@@ -9178,8 +9348,9 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30]
-; BITALG-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ugt <8 x i32> %2, <i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30>
@@ -9253,8 +9424,9 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v8i32:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_31_v8i32:
@@ -9280,8 +9452,9 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; BITALG-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31]
-; BITALG-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0)
   %3 = icmp ult <8 x i32> %2, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -9327,8 +9500,9 @@ define <4 x i64> @ugt_1_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_1_v4i64:
@@ -9392,8 +9566,9 @@ define <4 x i64> @ult_2_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_2_v4i64:
@@ -9473,8 +9648,9 @@ define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_2_v4i64:
@@ -9492,8 +9668,9 @@ define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 2, i64 2, i64 2, i64 2>
@@ -9555,8 +9732,9 @@ define <4 x i64> @ult_3_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_3_v4i64:
@@ -9574,8 +9752,9 @@ define <4 x i64> @ult_3_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 3, i64 3, i64 3, i64 3>
@@ -9637,8 +9816,9 @@ define <4 x i64> @ugt_3_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_3_v4i64:
@@ -9656,8 +9836,9 @@ define <4 x i64> @ugt_3_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 3, i64 3, i64 3, i64 3>
@@ -9719,8 +9900,9 @@ define <4 x i64> @ult_4_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_4_v4i64:
@@ -9738,8 +9920,9 @@ define <4 x i64> @ult_4_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 4, i64 4, i64 4, i64 4>
@@ -9801,8 +9984,9 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_4_v4i64:
@@ -9820,8 +10004,9 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 4, i64 4, i64 4, i64 4>
@@ -9883,8 +10068,9 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_5_v4i64:
@@ -9902,8 +10088,9 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 5, i64 5, i64 5, i64 5>
@@ -9965,8 +10152,9 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_5_v4i64:
@@ -9984,8 +10172,9 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 5, i64 5, i64 5, i64 5>
@@ -10047,8 +10236,9 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_6_v4i64:
@@ -10066,8 +10256,9 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 6, i64 6, i64 6, i64 6>
@@ -10129,8 +10320,9 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_6_v4i64:
@@ -10148,8 +10340,9 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 6, i64 6, i64 6, i64 6>
@@ -10211,8 +10404,9 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_7_v4i64:
@@ -10230,8 +10424,9 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 7, i64 7, i64 7, i64 7>
@@ -10293,8 +10488,9 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_7_v4i64:
@@ -10312,8 +10508,9 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 7, i64 7, i64 7, i64 7>
@@ -10375,8 +10572,9 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_8_v4i64:
@@ -10394,8 +10592,9 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 8, i64 8, i64 8, i64 8>
@@ -10457,8 +10656,9 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_8_v4i64:
@@ -10476,8 +10676,9 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 8, i64 8, i64 8, i64 8>
@@ -10539,8 +10740,9 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_9_v4i64:
@@ -10558,8 +10760,9 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 9, i64 9, i64 9, i64 9>
@@ -10621,8 +10824,9 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_9_v4i64:
@@ -10640,8 +10844,9 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 9, i64 9, i64 9, i64 9>
@@ -10703,8 +10908,9 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_10_v4i64:
@@ -10722,8 +10928,9 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 10, i64 10, i64 10, i64 10>
@@ -10785,8 +10992,9 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_10_v4i64:
@@ -10804,8 +11012,9 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 10, i64 10, i64 10, i64 10>
@@ -10867,8 +11076,9 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_11_v4i64:
@@ -10886,8 +11096,9 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 11, i64 11, i64 11, i64 11>
@@ -10949,8 +11160,9 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_11_v4i64:
@@ -10968,8 +11180,9 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 11, i64 11, i64 11, i64 11>
@@ -11031,8 +11244,9 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_12_v4i64:
@@ -11050,8 +11264,9 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 12, i64 12, i64 12, i64 12>
@@ -11113,8 +11328,9 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_12_v4i64:
@@ -11132,8 +11348,9 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 12, i64 12, i64 12, i64 12>
@@ -11195,8 +11412,9 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_13_v4i64:
@@ -11214,8 +11432,9 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 13, i64 13, i64 13, i64 13>
@@ -11277,8 +11496,9 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_13_v4i64:
@@ -11296,8 +11516,9 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 13, i64 13, i64 13, i64 13>
@@ -11359,8 +11580,9 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_14_v4i64:
@@ -11378,8 +11600,9 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 14, i64 14, i64 14, i64 14>
@@ -11441,8 +11664,9 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_14_v4i64:
@@ -11460,8 +11684,9 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 14, i64 14, i64 14, i64 14>
@@ -11523,8 +11748,9 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_15_v4i64:
@@ -11542,8 +11768,9 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 15, i64 15, i64 15, i64 15>
@@ -11605,8 +11832,9 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_15_v4i64:
@@ -11624,8 +11852,9 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 15, i64 15, i64 15, i64 15>
@@ -11687,8 +11916,9 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_16_v4i64:
@@ -11706,8 +11936,9 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 16, i64 16, i64 16, i64 16>
@@ -11769,8 +12000,9 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_16_v4i64:
@@ -11788,8 +12020,9 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 16, i64 16, i64 16, i64 16>
@@ -11851,8 +12084,9 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_17_v4i64:
@@ -11870,8 +12104,9 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 17, i64 17, i64 17, i64 17>
@@ -11933,8 +12168,9 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_17_v4i64:
@@ -11952,8 +12188,9 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 17, i64 17, i64 17, i64 17>
@@ -12015,8 +12252,9 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_18_v4i64:
@@ -12034,8 +12272,9 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 18, i64 18, i64 18, i64 18>
@@ -12097,8 +12336,9 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_18_v4i64:
@@ -12116,8 +12356,9 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 18, i64 18, i64 18, i64 18>
@@ -12179,8 +12420,9 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_19_v4i64:
@@ -12198,8 +12440,9 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 19, i64 19, i64 19, i64 19>
@@ -12261,8 +12504,9 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_19_v4i64:
@@ -12280,8 +12524,9 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 19, i64 19, i64 19, i64 19>
@@ -12343,8 +12588,9 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_20_v4i64:
@@ -12362,8 +12608,9 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 20, i64 20, i64 20, i64 20>
@@ -12425,8 +12672,9 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_20_v4i64:
@@ -12444,8 +12692,9 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 20, i64 20, i64 20, i64 20>
@@ -12507,8 +12756,9 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_21_v4i64:
@@ -12526,8 +12776,9 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 21, i64 21, i64 21, i64 21>
@@ -12589,8 +12840,9 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_21_v4i64:
@@ -12608,8 +12860,9 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 21, i64 21, i64 21, i64 21>
@@ -12671,8 +12924,9 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_22_v4i64:
@@ -12690,8 +12944,9 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 22, i64 22, i64 22, i64 22>
@@ -12753,8 +13008,9 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_22_v4i64:
@@ -12772,8 +13028,9 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 22, i64 22, i64 22, i64 22>
@@ -12835,8 +13092,9 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_23_v4i64:
@@ -12854,8 +13112,9 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 23, i64 23, i64 23, i64 23>
@@ -12917,8 +13176,9 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_23_v4i64:
@@ -12936,8 +13196,9 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 23, i64 23, i64 23, i64 23>
@@ -12999,8 +13260,9 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_24_v4i64:
@@ -13018,8 +13280,9 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 24, i64 24, i64 24, i64 24>
@@ -13081,8 +13344,9 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_24_v4i64:
@@ -13100,8 +13364,9 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 24, i64 24, i64 24, i64 24>
@@ -13163,8 +13428,9 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_25_v4i64:
@@ -13182,8 +13448,9 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 25, i64 25, i64 25, i64 25>
@@ -13245,8 +13512,9 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_25_v4i64:
@@ -13264,8 +13532,9 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 25, i64 25, i64 25, i64 25>
@@ -13327,8 +13596,9 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_26_v4i64:
@@ -13346,8 +13616,9 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 26, i64 26, i64 26, i64 26>
@@ -13409,8 +13680,9 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_26_v4i64:
@@ -13428,8 +13700,9 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 26, i64 26, i64 26, i64 26>
@@ -13491,8 +13764,9 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_27_v4i64:
@@ -13510,8 +13784,9 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 27, i64 27, i64 27, i64 27>
@@ -13573,8 +13848,9 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_27_v4i64:
@@ -13592,8 +13868,9 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 27, i64 27, i64 27, i64 27>
@@ -13655,8 +13932,9 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_28_v4i64:
@@ -13674,8 +13952,9 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 28, i64 28, i64 28, i64 28>
@@ -13737,8 +14016,9 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_28_v4i64:
@@ -13756,8 +14036,9 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 28, i64 28, i64 28, i64 28>
@@ -13819,8 +14100,9 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_29_v4i64:
@@ -13838,8 +14120,9 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 29, i64 29, i64 29, i64 29>
@@ -13901,8 +14184,9 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_29_v4i64:
@@ -13920,8 +14204,9 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 29, i64 29, i64 29, i64 29>
@@ -13983,8 +14268,9 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_30_v4i64:
@@ -14002,8 +14288,9 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 30, i64 30, i64 30, i64 30>
@@ -14065,8 +14352,9 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_30_v4i64:
@@ -14084,8 +14372,9 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 30, i64 30, i64 30, i64 30>
@@ -14147,8 +14436,9 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_31_v4i64:
@@ -14166,8 +14456,9 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 31, i64 31, i64 31, i64 31>
@@ -14229,8 +14520,9 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_31_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_31_v4i64:
@@ -14248,8 +14540,9 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 31, i64 31, i64 31, i64 31>
@@ -14311,8 +14604,9 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_32_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_32_v4i64:
@@ -14330,8 +14624,9 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 32, i64 32, i64 32, i64 32>
@@ -14393,8 +14688,9 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_32_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_32_v4i64:
@@ -14412,8 +14708,9 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 32, i64 32, i64 32, i64 32>
@@ -14475,8 +14772,9 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_33_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_33_v4i64:
@@ -14494,8 +14792,9 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 33, i64 33, i64 33, i64 33>
@@ -14557,8 +14856,9 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_33_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_33_v4i64:
@@ -14576,8 +14876,9 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 33, i64 33, i64 33, i64 33>
@@ -14639,8 +14940,9 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_34_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_34_v4i64:
@@ -14658,8 +14960,9 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 34, i64 34, i64 34, i64 34>
@@ -14721,8 +15024,9 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_34_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_34_v4i64:
@@ -14740,8 +15044,9 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 34, i64 34, i64 34, i64 34>
@@ -14803,8 +15108,9 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_35_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_35_v4i64:
@@ -14822,8 +15128,9 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 35, i64 35, i64 35, i64 35>
@@ -14885,8 +15192,9 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_35_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_35_v4i64:
@@ -14904,8 +15212,9 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 35, i64 35, i64 35, i64 35>
@@ -14967,8 +15276,9 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_36_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_36_v4i64:
@@ -14986,8 +15296,9 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 36, i64 36, i64 36, i64 36>
@@ -15049,8 +15360,9 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_36_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_36_v4i64:
@@ -15068,8 +15380,9 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 36, i64 36, i64 36, i64 36>
@@ -15131,8 +15444,9 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_37_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_37_v4i64:
@@ -15150,8 +15464,9 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 37, i64 37, i64 37, i64 37>
@@ -15213,8 +15528,9 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_37_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_37_v4i64:
@@ -15232,8 +15548,9 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 37, i64 37, i64 37, i64 37>
@@ -15295,8 +15612,9 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_38_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_38_v4i64:
@@ -15314,8 +15632,9 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 38, i64 38, i64 38, i64 38>
@@ -15377,8 +15696,9 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_38_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_38_v4i64:
@@ -15396,8 +15716,9 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 38, i64 38, i64 38, i64 38>
@@ -15459,8 +15780,9 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_39_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_39_v4i64:
@@ -15478,8 +15800,9 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 39, i64 39, i64 39, i64 39>
@@ -15541,8 +15864,9 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_39_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_39_v4i64:
@@ -15560,8 +15884,9 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 39, i64 39, i64 39, i64 39>
@@ -15623,8 +15948,9 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_40_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_40_v4i64:
@@ -15642,8 +15968,9 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 40, i64 40, i64 40, i64 40>
@@ -15705,8 +16032,9 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_40_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_40_v4i64:
@@ -15724,8 +16052,9 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 40, i64 40, i64 40, i64 40>
@@ -15787,8 +16116,9 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_41_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_41_v4i64:
@@ -15806,8 +16136,9 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 41, i64 41, i64 41, i64 41>
@@ -15869,8 +16200,9 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_41_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_41_v4i64:
@@ -15888,8 +16220,9 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 41, i64 41, i64 41, i64 41>
@@ -15951,8 +16284,9 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_42_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_42_v4i64:
@@ -15970,8 +16304,9 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 42, i64 42, i64 42, i64 42>
@@ -16033,8 +16368,9 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_42_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_42_v4i64:
@@ -16052,8 +16388,9 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 42, i64 42, i64 42, i64 42>
@@ -16115,8 +16452,9 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_43_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_43_v4i64:
@@ -16134,8 +16472,9 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 43, i64 43, i64 43, i64 43>
@@ -16197,8 +16536,9 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_43_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_43_v4i64:
@@ -16216,8 +16556,9 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 43, i64 43, i64 43, i64 43>
@@ -16279,8 +16620,9 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_44_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_44_v4i64:
@@ -16298,8 +16640,9 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 44, i64 44, i64 44, i64 44>
@@ -16361,8 +16704,9 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_44_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_44_v4i64:
@@ -16380,8 +16724,9 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 44, i64 44, i64 44, i64 44>
@@ -16443,8 +16788,9 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_45_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_45_v4i64:
@@ -16462,8 +16808,9 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 45, i64 45, i64 45, i64 45>
@@ -16525,8 +16872,9 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_45_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_45_v4i64:
@@ -16544,8 +16892,9 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 45, i64 45, i64 45, i64 45>
@@ -16607,8 +16956,9 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_46_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_46_v4i64:
@@ -16626,8 +16976,9 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 46, i64 46, i64 46, i64 46>
@@ -16689,8 +17040,9 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_46_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_46_v4i64:
@@ -16708,8 +17060,9 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 46, i64 46, i64 46, i64 46>
@@ -16771,8 +17124,9 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_47_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_47_v4i64:
@@ -16790,8 +17144,9 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 47, i64 47, i64 47, i64 47>
@@ -16853,8 +17208,9 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_47_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_47_v4i64:
@@ -16872,8 +17228,9 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 47, i64 47, i64 47, i64 47>
@@ -16935,8 +17292,9 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_48_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_48_v4i64:
@@ -16954,8 +17312,9 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 48, i64 48, i64 48, i64 48>
@@ -17017,8 +17376,9 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_48_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_48_v4i64:
@@ -17036,8 +17396,9 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 48, i64 48, i64 48, i64 48>
@@ -17099,8 +17460,9 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_49_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_49_v4i64:
@@ -17118,8 +17480,9 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 49, i64 49, i64 49, i64 49>
@@ -17181,8 +17544,9 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_49_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_49_v4i64:
@@ -17200,8 +17564,9 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 49, i64 49, i64 49, i64 49>
@@ -17263,8 +17628,9 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_50_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_50_v4i64:
@@ -17282,8 +17648,9 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 50, i64 50, i64 50, i64 50>
@@ -17345,8 +17712,9 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_50_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_50_v4i64:
@@ -17364,8 +17732,9 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 50, i64 50, i64 50, i64 50>
@@ -17427,8 +17796,9 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_51_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_51_v4i64:
@@ -17446,8 +17816,9 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 51, i64 51, i64 51, i64 51>
@@ -17509,8 +17880,9 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_51_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_51_v4i64:
@@ -17528,8 +17900,9 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 51, i64 51, i64 51, i64 51>
@@ -17591,8 +17964,9 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_52_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_52_v4i64:
@@ -17610,8 +17984,9 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 52, i64 52, i64 52, i64 52>
@@ -17673,8 +18048,9 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_52_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_52_v4i64:
@@ -17692,8 +18068,9 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 52, i64 52, i64 52, i64 52>
@@ -17755,8 +18132,9 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_53_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_53_v4i64:
@@ -17774,8 +18152,9 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 53, i64 53, i64 53, i64 53>
@@ -17837,8 +18216,9 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_53_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_53_v4i64:
@@ -17856,8 +18236,9 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 53, i64 53, i64 53, i64 53>
@@ -17919,8 +18300,9 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_54_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_54_v4i64:
@@ -17938,8 +18320,9 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 54, i64 54, i64 54, i64 54>
@@ -18001,8 +18384,9 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_54_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_54_v4i64:
@@ -18020,8 +18404,9 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 54, i64 54, i64 54, i64 54>
@@ -18083,8 +18468,9 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_55_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_55_v4i64:
@@ -18102,8 +18488,9 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 55, i64 55, i64 55, i64 55>
@@ -18165,8 +18552,9 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_55_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_55_v4i64:
@@ -18184,8 +18572,9 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 55, i64 55, i64 55, i64 55>
@@ -18247,8 +18636,9 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_56_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_56_v4i64:
@@ -18266,8 +18656,9 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 56, i64 56, i64 56, i64 56>
@@ -18329,8 +18720,9 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_56_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_56_v4i64:
@@ -18348,8 +18740,9 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 56, i64 56, i64 56, i64 56>
@@ -18411,8 +18804,9 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_57_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_57_v4i64:
@@ -18430,8 +18824,9 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 57, i64 57, i64 57, i64 57>
@@ -18493,8 +18888,9 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_57_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_57_v4i64:
@@ -18512,8 +18908,9 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 57, i64 57, i64 57, i64 57>
@@ -18575,8 +18972,9 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_58_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_58_v4i64:
@@ -18594,8 +18992,9 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 58, i64 58, i64 58, i64 58>
@@ -18657,8 +19056,9 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_58_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_58_v4i64:
@@ -18676,8 +19076,9 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 58, i64 58, i64 58, i64 58>
@@ -18739,8 +19140,9 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_59_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_59_v4i64:
@@ -18758,8 +19160,9 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 59, i64 59, i64 59, i64 59>
@@ -18821,8 +19224,9 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_59_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_59_v4i64:
@@ -18840,8 +19244,9 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 59, i64 59, i64 59, i64 59>
@@ -18903,8 +19308,9 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_60_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_60_v4i64:
@@ -18922,8 +19328,9 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 60, i64 60, i64 60, i64 60>
@@ -18985,8 +19392,9 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_60_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_60_v4i64:
@@ -19004,8 +19412,9 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 60, i64 60, i64 60, i64 60>
@@ -19067,8 +19476,9 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_61_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_61_v4i64:
@@ -19086,8 +19496,9 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 61, i64 61, i64 61, i64 61>
@@ -19149,8 +19560,9 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_61_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_61_v4i64:
@@ -19168,8 +19580,9 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 61, i64 61, i64 61, i64 61>
@@ -19231,8 +19644,9 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_62_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_62_v4i64:
@@ -19250,8 +19664,9 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 62, i64 62, i64 62, i64 62>
@@ -19313,8 +19728,9 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ugt_62_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ugt_62_v4i64:
@@ -19332,8 +19748,9 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62]
-; BITALG-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ugt <4 x i64> %2, <i64 62, i64 62, i64 62, i64 62>
@@ -19395,8 +19812,9 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) {
 ; AVX512VPOPCNTDQVL-LABEL: ult_63_v4i64:
 ; AVX512VPOPCNTDQVL:       # %bb.0:
 ; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %ymm0, %ymm0
-; AVX512VPOPCNTDQVL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63]
-; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VPOPCNTDQVL-NEXT:    retq
 ;
 ; BITALG_NOVLX-LABEL: ult_63_v4i64:
@@ -19414,8 +19832,9 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %ymm0, %ymm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
-; BITALG-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63]
-; BITALG-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; BITALG-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; BITALG-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0)
   %3 = icmp ult <4 x i64> %2, <i64 63, i64 63, i64 63, i64 63>

diff  --git a/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll
index ef17822b1cf9..820f0f0b723e 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll
@@ -56,7 +56,7 @@ define <64 x i8> @ugt_1_v64i8(<64 x i8> %0) {
 ; BITALG-LABEL: ugt_1_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -113,7 +113,7 @@ define <64 x i8> @ult_2_v64i8(<64 x i8> %0) {
 ; BITALG-LABEL: ult_2_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -140,9 +140,11 @@ define <64 x i8> @ugt_2_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -156,7 +158,7 @@ define <64 x i8> @ugt_2_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -177,9 +179,11 @@ define <64 x i8> @ugt_2_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -193,14 +197,14 @@ define <64 x i8> @ugt_2_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_2_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -227,9 +231,11 @@ define <64 x i8> @ult_3_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -243,7 +249,7 @@ define <64 x i8> @ult_3_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -264,9 +270,11 @@ define <64 x i8> @ult_3_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -280,14 +288,14 @@ define <64 x i8> @ult_3_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_3_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -314,9 +322,11 @@ define <64 x i8> @ugt_3_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -330,7 +340,7 @@ define <64 x i8> @ugt_3_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -351,9 +361,11 @@ define <64 x i8> @ugt_3_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -367,14 +379,14 @@ define <64 x i8> @ugt_3_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_3_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -401,9 +413,11 @@ define <64 x i8> @ult_4_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -417,7 +431,7 @@ define <64 x i8> @ult_4_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -438,9 +452,11 @@ define <64 x i8> @ult_4_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -454,14 +470,14 @@ define <64 x i8> @ult_4_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_4_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -488,9 +504,11 @@ define <64 x i8> @ugt_4_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -504,7 +522,7 @@ define <64 x i8> @ugt_4_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -525,9 +543,11 @@ define <64 x i8> @ugt_4_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -541,14 +561,14 @@ define <64 x i8> @ugt_4_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_4_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -575,9 +595,11 @@ define <64 x i8> @ult_5_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -591,7 +613,7 @@ define <64 x i8> @ult_5_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -612,9 +634,11 @@ define <64 x i8> @ult_5_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -628,14 +652,14 @@ define <64 x i8> @ult_5_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_5_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -662,9 +686,11 @@ define <64 x i8> @ugt_5_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -678,7 +704,7 @@ define <64 x i8> @ugt_5_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -699,9 +725,11 @@ define <64 x i8> @ugt_5_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -715,14 +743,14 @@ define <64 x i8> @ugt_5_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_5_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -749,9 +777,11 @@ define <64 x i8> @ult_6_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -765,7 +795,7 @@ define <64 x i8> @ult_6_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -786,9 +816,11 @@ define <64 x i8> @ult_6_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -802,14 +834,14 @@ define <64 x i8> @ult_6_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_6_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -836,9 +868,11 @@ define <64 x i8> @ugt_6_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -852,7 +886,7 @@ define <64 x i8> @ugt_6_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -873,9 +907,11 @@ define <64 x i8> @ugt_6_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -889,14 +925,14 @@ define <64 x i8> @ugt_6_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_6_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -923,9 +959,11 @@ define <64 x i8> @ult_7_v64i8(<64 x i8> %0) {
 ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -939,7 +977,7 @@ define <64 x i8> @ult_7_v64i8(<64 x i8> %0) {
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -960,9 +998,11 @@ define <64 x i8> @ult_7_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpgtb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpminub %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NOBW-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
 ;
@@ -976,14 +1016,14 @@ define <64 x i8> @ult_7_v64i8(<64 x i8> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_7_v64i8:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2b %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0)
@@ -1042,7 +1082,7 @@ define <32 x i16> @ugt_1_v32i16(<32 x i16> %0) {
 ; BITALG-LABEL: ugt_1_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1099,7 +1139,7 @@ define <32 x i16> @ult_2_v32i16(<32 x i16> %0) {
 ; BITALG-LABEL: ult_2_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1151,7 +1191,7 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1183,14 +1223,14 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_2_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1242,7 +1282,7 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1274,14 +1314,14 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_3_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1333,7 +1373,7 @@ define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1365,14 +1405,14 @@ define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_3_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1424,7 +1464,7 @@ define <32 x i16> @ult_4_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1456,14 +1496,14 @@ define <32 x i16> @ult_4_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_4_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1515,7 +1555,7 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1547,14 +1587,14 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_4_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1606,7 +1646,7 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1638,14 +1678,14 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_5_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1697,7 +1737,7 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1729,14 +1769,14 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_5_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1788,7 +1828,7 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1820,14 +1860,14 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_6_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1879,7 +1919,7 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -1911,14 +1951,14 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_6_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -1970,7 +2010,7 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2002,14 +2042,14 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_7_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2061,7 +2101,7 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2093,14 +2133,14 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_7_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2152,7 +2192,7 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2184,14 +2224,14 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_8_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2243,7 +2283,7 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2275,14 +2315,14 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_8_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2334,7 +2374,7 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2366,14 +2406,14 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_9_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2425,7 +2465,7 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2457,14 +2497,14 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_9_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2516,7 +2556,7 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2548,14 +2588,14 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_10_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2607,7 +2647,7 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2639,14 +2679,14 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_10_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2698,7 +2738,7 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2730,14 +2770,14 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_11_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2789,7 +2829,7 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2821,14 +2861,14 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_11_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2880,7 +2920,7 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -2912,14 +2952,14 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_12_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -2971,7 +3011,7 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3003,14 +3043,14 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_12_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3062,7 +3102,7 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3094,14 +3134,14 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_13_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3153,7 +3193,7 @@ define <32 x i16> @ugt_13_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3185,14 +3225,14 @@ define <32 x i16> @ugt_13_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_13_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3244,7 +3284,7 @@ define <32 x i16> @ult_14_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3276,14 +3316,14 @@ define <32 x i16> @ult_14_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_14_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3335,7 +3375,7 @@ define <32 x i16> @ugt_14_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3367,14 +3407,14 @@ define <32 x i16> @ugt_14_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ugt_14_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3426,7 +3466,7 @@ define <32 x i16> @ult_15_v32i16(<32 x i16> %0) {
 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
@@ -3458,14 +3498,14 @@ define <32 x i16> @ult_15_v32i16(<32 x i16> %0) {
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
-; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512VPOPCNTDQ-BW-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; AVX512VPOPCNTDQ-BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
 ;
 ; BITALG-LABEL: ult_15_v32i16:
 ; BITALG:       # %bb.0:
 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; BITALG-NEXT:    vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
 ; BITALG-NEXT:    vpmovm2w %k0, %zmm0
 ; BITALG-NEXT:    retq
   %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0)
@@ -3494,7 +3534,7 @@ define <16 x i32> @ugt_1_v16i32(<16 x i32> %0) {
 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3531,7 +3571,7 @@ define <16 x i32> @ult_2_v16i32(<16 x i32> %0) {
 ; AVX512VPOPCNTDQ-LABEL: ult_2_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3578,7 +3618,7 @@ define <16 x i32> @ugt_2_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3598,14 +3638,14 @@ define <16 x i32> @ugt_2_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3618,7 +3658,7 @@ define <16 x i32> @ugt_2_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -3657,7 +3697,7 @@ define <16 x i32> @ult_3_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3677,14 +3717,14 @@ define <16 x i32> @ult_3_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_3_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3697,7 +3737,7 @@ define <16 x i32> @ult_3_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -3736,7 +3776,7 @@ define <16 x i32> @ugt_3_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3756,14 +3796,14 @@ define <16 x i32> @ugt_3_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3776,7 +3816,7 @@ define <16 x i32> @ugt_3_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -3815,7 +3855,7 @@ define <16 x i32> @ult_4_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3835,14 +3875,14 @@ define <16 x i32> @ult_4_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_4_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3855,7 +3895,7 @@ define <16 x i32> @ult_4_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -3894,7 +3934,7 @@ define <16 x i32> @ugt_4_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3914,14 +3954,14 @@ define <16 x i32> @ugt_4_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -3934,7 +3974,7 @@ define <16 x i32> @ugt_4_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -3973,7 +4013,7 @@ define <16 x i32> @ult_5_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -3993,14 +4033,14 @@ define <16 x i32> @ult_5_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_5_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4013,7 +4053,7 @@ define <16 x i32> @ult_5_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4052,7 +4092,7 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4072,14 +4112,14 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4092,7 +4132,7 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4131,7 +4171,7 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4151,14 +4191,14 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_6_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4171,7 +4211,7 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4210,7 +4250,7 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4230,14 +4270,14 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4250,7 +4290,7 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4289,7 +4329,7 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4309,14 +4349,14 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_7_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4329,7 +4369,7 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4368,7 +4408,7 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4388,14 +4428,14 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4408,7 +4448,7 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4447,7 +4487,7 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4467,14 +4507,14 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_8_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4487,7 +4527,7 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4526,7 +4566,7 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4546,14 +4586,14 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4566,7 +4606,7 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4605,7 +4645,7 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4625,14 +4665,14 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_9_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4645,7 +4685,7 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4684,7 +4724,7 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4704,14 +4744,14 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4724,7 +4764,7 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4763,7 +4803,7 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4783,14 +4823,14 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_10_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4803,7 +4843,7 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4842,7 +4882,7 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4862,14 +4902,14 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4882,7 +4922,7 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -4921,7 +4961,7 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -4941,14 +4981,14 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_11_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -4961,7 +5001,7 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5000,7 +5040,7 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5020,14 +5060,14 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5040,7 +5080,7 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5079,7 +5119,7 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5099,14 +5139,14 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_12_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5119,7 +5159,7 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5158,7 +5198,7 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5178,14 +5218,14 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5198,7 +5238,7 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5237,7 +5277,7 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5257,14 +5297,14 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_13_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5277,7 +5317,7 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5316,7 +5356,7 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5336,14 +5376,14 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5356,7 +5396,7 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5395,7 +5435,7 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5415,14 +5455,14 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_14_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5435,7 +5475,7 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5474,7 +5514,7 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5494,14 +5534,14 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5514,7 +5554,7 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5553,7 +5593,7 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5573,14 +5613,14 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_15_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5593,7 +5633,7 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5632,7 +5672,7 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5652,14 +5692,14 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5672,7 +5712,7 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5711,7 +5751,7 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5731,14 +5771,14 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_16_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5751,7 +5791,7 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5790,7 +5830,7 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5810,14 +5850,14 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5830,7 +5870,7 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5869,7 +5909,7 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5889,14 +5929,14 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_17_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5909,7 +5949,7 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -5948,7 +5988,7 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -5968,14 +6008,14 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -5988,7 +6028,7 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6027,7 +6067,7 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6047,14 +6087,14 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_18_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6067,7 +6107,7 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6106,7 +6146,7 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6126,14 +6166,14 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6146,7 +6186,7 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6185,7 +6225,7 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6205,14 +6245,14 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_19_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6225,7 +6265,7 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6264,7 +6304,7 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6284,14 +6324,14 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6304,7 +6344,7 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6343,7 +6383,7 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6363,14 +6403,14 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_20_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6383,7 +6423,7 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6422,7 +6462,7 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6442,14 +6482,14 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6462,7 +6502,7 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6501,7 +6541,7 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6521,14 +6561,14 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_21_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6541,7 +6581,7 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6580,7 +6620,7 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6600,14 +6640,14 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6620,7 +6660,7 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6659,7 +6699,7 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6679,14 +6719,14 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_22_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6699,7 +6739,7 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6738,7 +6778,7 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6758,14 +6798,14 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6778,7 +6818,7 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6817,7 +6857,7 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6837,14 +6877,14 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_23_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6857,7 +6897,7 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6896,7 +6936,7 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6916,14 +6956,14 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -6936,7 +6976,7 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -6975,7 +7015,7 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -6995,14 +7035,14 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_24_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7015,7 +7055,7 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7054,7 +7094,7 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7074,14 +7114,14 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7094,7 +7134,7 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7133,7 +7173,7 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7153,14 +7193,14 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_25_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7173,7 +7213,7 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7212,7 +7252,7 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7232,14 +7272,14 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7252,7 +7292,7 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7291,7 +7331,7 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7311,14 +7351,14 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_26_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7331,7 +7371,7 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7370,7 +7410,7 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7390,14 +7430,14 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7410,7 +7450,7 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7449,7 +7489,7 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7469,14 +7509,14 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_27_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7489,7 +7529,7 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7528,7 +7568,7 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7548,14 +7588,14 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7568,7 +7608,7 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7607,7 +7647,7 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7627,14 +7667,14 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_28_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7647,7 +7687,7 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7686,7 +7726,7 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7706,14 +7746,14 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7726,7 +7766,7 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7765,7 +7805,7 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7785,14 +7825,14 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_29_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7805,7 +7845,7 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7844,7 +7884,7 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7864,14 +7904,14 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7884,7 +7924,7 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -7923,7 +7963,7 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -7943,14 +7983,14 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_30_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -7963,7 +8003,7 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -8002,7 +8042,7 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8022,14 +8062,14 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8042,7 +8082,7 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -8081,7 +8121,7 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) {
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8101,14 +8141,14 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) {
 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_31_v16i32:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8121,7 +8161,7 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) {
 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0)
@@ -8150,7 +8190,7 @@ define <8 x i64> @ugt_1_v8i64(<8 x i64> %0) {
 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8187,7 +8227,7 @@ define <8 x i64> @ult_2_v8i64(<8 x i64> %0) {
 ; AVX512VPOPCNTDQ-LABEL: ult_2_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8226,7 +8266,7 @@ define <8 x i64> @ugt_2_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8242,14 +8282,14 @@ define <8 x i64> @ugt_2_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8258,7 +8298,7 @@ define <8 x i64> @ugt_2_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8289,7 +8329,7 @@ define <8 x i64> @ult_3_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8305,14 +8345,14 @@ define <8 x i64> @ult_3_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_3_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8321,7 +8361,7 @@ define <8 x i64> @ult_3_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8352,7 +8392,7 @@ define <8 x i64> @ugt_3_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8368,14 +8408,14 @@ define <8 x i64> @ugt_3_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8384,7 +8424,7 @@ define <8 x i64> @ugt_3_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8415,7 +8455,7 @@ define <8 x i64> @ult_4_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8431,14 +8471,14 @@ define <8 x i64> @ult_4_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_4_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8447,7 +8487,7 @@ define <8 x i64> @ult_4_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8478,7 +8518,7 @@ define <8 x i64> @ugt_4_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8494,14 +8534,14 @@ define <8 x i64> @ugt_4_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8510,7 +8550,7 @@ define <8 x i64> @ugt_4_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8541,7 +8581,7 @@ define <8 x i64> @ult_5_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8557,14 +8597,14 @@ define <8 x i64> @ult_5_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_5_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8573,7 +8613,7 @@ define <8 x i64> @ult_5_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8604,7 +8644,7 @@ define <8 x i64> @ugt_5_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8620,14 +8660,14 @@ define <8 x i64> @ugt_5_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8636,7 +8676,7 @@ define <8 x i64> @ugt_5_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8667,7 +8707,7 @@ define <8 x i64> @ult_6_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8683,14 +8723,14 @@ define <8 x i64> @ult_6_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_6_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8699,7 +8739,7 @@ define <8 x i64> @ult_6_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8730,7 +8770,7 @@ define <8 x i64> @ugt_6_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8746,14 +8786,14 @@ define <8 x i64> @ugt_6_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8762,7 +8802,7 @@ define <8 x i64> @ugt_6_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8793,7 +8833,7 @@ define <8 x i64> @ult_7_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8809,14 +8849,14 @@ define <8 x i64> @ult_7_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_7_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8825,7 +8865,7 @@ define <8 x i64> @ult_7_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8856,7 +8896,7 @@ define <8 x i64> @ugt_7_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8872,14 +8912,14 @@ define <8 x i64> @ugt_7_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8888,7 +8928,7 @@ define <8 x i64> @ugt_7_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8919,7 +8959,7 @@ define <8 x i64> @ult_8_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8935,14 +8975,14 @@ define <8 x i64> @ult_8_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_8_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -8951,7 +8991,7 @@ define <8 x i64> @ult_8_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -8982,7 +9022,7 @@ define <8 x i64> @ugt_8_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -8998,14 +9038,14 @@ define <8 x i64> @ugt_8_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9014,7 +9054,7 @@ define <8 x i64> @ugt_8_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9045,7 +9085,7 @@ define <8 x i64> @ult_9_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9061,14 +9101,14 @@ define <8 x i64> @ult_9_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_9_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9077,7 +9117,7 @@ define <8 x i64> @ult_9_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9108,7 +9148,7 @@ define <8 x i64> @ugt_9_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9124,14 +9164,14 @@ define <8 x i64> @ugt_9_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9140,7 +9180,7 @@ define <8 x i64> @ugt_9_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9171,7 +9211,7 @@ define <8 x i64> @ult_10_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9187,14 +9227,14 @@ define <8 x i64> @ult_10_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_10_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9203,7 +9243,7 @@ define <8 x i64> @ult_10_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9234,7 +9274,7 @@ define <8 x i64> @ugt_10_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9250,14 +9290,14 @@ define <8 x i64> @ugt_10_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9266,7 +9306,7 @@ define <8 x i64> @ugt_10_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9297,7 +9337,7 @@ define <8 x i64> @ult_11_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9313,14 +9353,14 @@ define <8 x i64> @ult_11_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_11_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9329,7 +9369,7 @@ define <8 x i64> @ult_11_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9360,7 +9400,7 @@ define <8 x i64> @ugt_11_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9376,14 +9416,14 @@ define <8 x i64> @ugt_11_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9392,7 +9432,7 @@ define <8 x i64> @ugt_11_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9423,7 +9463,7 @@ define <8 x i64> @ult_12_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9439,14 +9479,14 @@ define <8 x i64> @ult_12_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_12_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9455,7 +9495,7 @@ define <8 x i64> @ult_12_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9486,7 +9526,7 @@ define <8 x i64> @ugt_12_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9502,14 +9542,14 @@ define <8 x i64> @ugt_12_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9518,7 +9558,7 @@ define <8 x i64> @ugt_12_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9549,7 +9589,7 @@ define <8 x i64> @ult_13_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9565,14 +9605,14 @@ define <8 x i64> @ult_13_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_13_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9581,7 +9621,7 @@ define <8 x i64> @ult_13_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9612,7 +9652,7 @@ define <8 x i64> @ugt_13_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9628,14 +9668,14 @@ define <8 x i64> @ugt_13_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9644,7 +9684,7 @@ define <8 x i64> @ugt_13_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9675,7 +9715,7 @@ define <8 x i64> @ult_14_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9691,14 +9731,14 @@ define <8 x i64> @ult_14_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_14_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9707,7 +9747,7 @@ define <8 x i64> @ult_14_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9738,7 +9778,7 @@ define <8 x i64> @ugt_14_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9754,14 +9794,14 @@ define <8 x i64> @ugt_14_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9770,7 +9810,7 @@ define <8 x i64> @ugt_14_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9801,7 +9841,7 @@ define <8 x i64> @ult_15_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9817,14 +9857,14 @@ define <8 x i64> @ult_15_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_15_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9833,7 +9873,7 @@ define <8 x i64> @ult_15_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9864,7 +9904,7 @@ define <8 x i64> @ugt_15_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9880,14 +9920,14 @@ define <8 x i64> @ugt_15_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9896,7 +9936,7 @@ define <8 x i64> @ugt_15_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9927,7 +9967,7 @@ define <8 x i64> @ult_16_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -9943,14 +9983,14 @@ define <8 x i64> @ult_16_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_16_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -9959,7 +9999,7 @@ define <8 x i64> @ult_16_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -9990,7 +10030,7 @@ define <8 x i64> @ugt_16_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10006,14 +10046,14 @@ define <8 x i64> @ugt_16_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10022,7 +10062,7 @@ define <8 x i64> @ugt_16_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10053,7 +10093,7 @@ define <8 x i64> @ult_17_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10069,14 +10109,14 @@ define <8 x i64> @ult_17_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_17_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10085,7 +10125,7 @@ define <8 x i64> @ult_17_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10116,7 +10156,7 @@ define <8 x i64> @ugt_17_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10132,14 +10172,14 @@ define <8 x i64> @ugt_17_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10148,7 +10188,7 @@ define <8 x i64> @ugt_17_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10179,7 +10219,7 @@ define <8 x i64> @ult_18_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10195,14 +10235,14 @@ define <8 x i64> @ult_18_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_18_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10211,7 +10251,7 @@ define <8 x i64> @ult_18_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10242,7 +10282,7 @@ define <8 x i64> @ugt_18_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10258,14 +10298,14 @@ define <8 x i64> @ugt_18_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10274,7 +10314,7 @@ define <8 x i64> @ugt_18_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10305,7 +10345,7 @@ define <8 x i64> @ult_19_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10321,14 +10361,14 @@ define <8 x i64> @ult_19_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_19_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10337,7 +10377,7 @@ define <8 x i64> @ult_19_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10368,7 +10408,7 @@ define <8 x i64> @ugt_19_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10384,14 +10424,14 @@ define <8 x i64> @ugt_19_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10400,7 +10440,7 @@ define <8 x i64> @ugt_19_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10431,7 +10471,7 @@ define <8 x i64> @ult_20_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10447,14 +10487,14 @@ define <8 x i64> @ult_20_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_20_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10463,7 +10503,7 @@ define <8 x i64> @ult_20_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10494,7 +10534,7 @@ define <8 x i64> @ugt_20_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10510,14 +10550,14 @@ define <8 x i64> @ugt_20_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10526,7 +10566,7 @@ define <8 x i64> @ugt_20_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10557,7 +10597,7 @@ define <8 x i64> @ult_21_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10573,14 +10613,14 @@ define <8 x i64> @ult_21_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_21_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10589,7 +10629,7 @@ define <8 x i64> @ult_21_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10620,7 +10660,7 @@ define <8 x i64> @ugt_21_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10636,14 +10676,14 @@ define <8 x i64> @ugt_21_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10652,7 +10692,7 @@ define <8 x i64> @ugt_21_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10683,7 +10723,7 @@ define <8 x i64> @ult_22_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10699,14 +10739,14 @@ define <8 x i64> @ult_22_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_22_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10715,7 +10755,7 @@ define <8 x i64> @ult_22_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10746,7 +10786,7 @@ define <8 x i64> @ugt_22_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10762,14 +10802,14 @@ define <8 x i64> @ugt_22_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10778,7 +10818,7 @@ define <8 x i64> @ugt_22_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10809,7 +10849,7 @@ define <8 x i64> @ult_23_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10825,14 +10865,14 @@ define <8 x i64> @ult_23_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_23_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10841,7 +10881,7 @@ define <8 x i64> @ult_23_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10872,7 +10912,7 @@ define <8 x i64> @ugt_23_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10888,14 +10928,14 @@ define <8 x i64> @ugt_23_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10904,7 +10944,7 @@ define <8 x i64> @ugt_23_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10935,7 +10975,7 @@ define <8 x i64> @ult_24_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -10951,14 +10991,14 @@ define <8 x i64> @ult_24_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_24_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -10967,7 +11007,7 @@ define <8 x i64> @ult_24_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -10998,7 +11038,7 @@ define <8 x i64> @ugt_24_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11014,14 +11054,14 @@ define <8 x i64> @ugt_24_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11030,7 +11070,7 @@ define <8 x i64> @ugt_24_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11061,7 +11101,7 @@ define <8 x i64> @ult_25_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11077,14 +11117,14 @@ define <8 x i64> @ult_25_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_25_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11093,7 +11133,7 @@ define <8 x i64> @ult_25_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11124,7 +11164,7 @@ define <8 x i64> @ugt_25_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11140,14 +11180,14 @@ define <8 x i64> @ugt_25_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11156,7 +11196,7 @@ define <8 x i64> @ugt_25_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11187,7 +11227,7 @@ define <8 x i64> @ult_26_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11203,14 +11243,14 @@ define <8 x i64> @ult_26_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_26_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11219,7 +11259,7 @@ define <8 x i64> @ult_26_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11250,7 +11290,7 @@ define <8 x i64> @ugt_26_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11266,14 +11306,14 @@ define <8 x i64> @ugt_26_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11282,7 +11322,7 @@ define <8 x i64> @ugt_26_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11313,7 +11353,7 @@ define <8 x i64> @ult_27_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11329,14 +11369,14 @@ define <8 x i64> @ult_27_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_27_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11345,7 +11385,7 @@ define <8 x i64> @ult_27_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11376,7 +11416,7 @@ define <8 x i64> @ugt_27_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11392,14 +11432,14 @@ define <8 x i64> @ugt_27_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11408,7 +11448,7 @@ define <8 x i64> @ugt_27_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11439,7 +11479,7 @@ define <8 x i64> @ult_28_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11455,14 +11495,14 @@ define <8 x i64> @ult_28_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_28_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11471,7 +11511,7 @@ define <8 x i64> @ult_28_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11502,7 +11542,7 @@ define <8 x i64> @ugt_28_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11518,14 +11558,14 @@ define <8 x i64> @ugt_28_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11534,7 +11574,7 @@ define <8 x i64> @ugt_28_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11565,7 +11605,7 @@ define <8 x i64> @ult_29_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11581,14 +11621,14 @@ define <8 x i64> @ult_29_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_29_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11597,7 +11637,7 @@ define <8 x i64> @ult_29_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11628,7 +11668,7 @@ define <8 x i64> @ugt_29_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11644,14 +11684,14 @@ define <8 x i64> @ugt_29_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11660,7 +11700,7 @@ define <8 x i64> @ugt_29_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11691,7 +11731,7 @@ define <8 x i64> @ult_30_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11707,14 +11747,14 @@ define <8 x i64> @ult_30_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_30_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11723,7 +11763,7 @@ define <8 x i64> @ult_30_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11754,7 +11794,7 @@ define <8 x i64> @ugt_30_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11770,14 +11810,14 @@ define <8 x i64> @ugt_30_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11786,7 +11826,7 @@ define <8 x i64> @ugt_30_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11817,7 +11857,7 @@ define <8 x i64> @ult_31_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11833,14 +11873,14 @@ define <8 x i64> @ult_31_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_31_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11849,7 +11889,7 @@ define <8 x i64> @ult_31_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11880,7 +11920,7 @@ define <8 x i64> @ugt_31_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11896,14 +11936,14 @@ define <8 x i64> @ugt_31_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_31_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11912,7 +11952,7 @@ define <8 x i64> @ugt_31_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -11943,7 +11983,7 @@ define <8 x i64> @ult_32_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -11959,14 +11999,14 @@ define <8 x i64> @ult_32_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_32_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -11975,7 +12015,7 @@ define <8 x i64> @ult_32_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12006,7 +12046,7 @@ define <8 x i64> @ugt_32_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12022,14 +12062,14 @@ define <8 x i64> @ugt_32_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_32_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12038,7 +12078,7 @@ define <8 x i64> @ugt_32_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12069,7 +12109,7 @@ define <8 x i64> @ult_33_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12085,14 +12125,14 @@ define <8 x i64> @ult_33_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_33_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12101,7 +12141,7 @@ define <8 x i64> @ult_33_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12132,7 +12172,7 @@ define <8 x i64> @ugt_33_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12148,14 +12188,14 @@ define <8 x i64> @ugt_33_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_33_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12164,7 +12204,7 @@ define <8 x i64> @ugt_33_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12195,7 +12235,7 @@ define <8 x i64> @ult_34_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12211,14 +12251,14 @@ define <8 x i64> @ult_34_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_34_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12227,7 +12267,7 @@ define <8 x i64> @ult_34_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12258,7 +12298,7 @@ define <8 x i64> @ugt_34_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12274,14 +12314,14 @@ define <8 x i64> @ugt_34_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_34_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12290,7 +12330,7 @@ define <8 x i64> @ugt_34_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12321,7 +12361,7 @@ define <8 x i64> @ult_35_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12337,14 +12377,14 @@ define <8 x i64> @ult_35_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_35_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12353,7 +12393,7 @@ define <8 x i64> @ult_35_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12384,7 +12424,7 @@ define <8 x i64> @ugt_35_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12400,14 +12440,14 @@ define <8 x i64> @ugt_35_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_35_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12416,7 +12456,7 @@ define <8 x i64> @ugt_35_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12447,7 +12487,7 @@ define <8 x i64> @ult_36_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12463,14 +12503,14 @@ define <8 x i64> @ult_36_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_36_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12479,7 +12519,7 @@ define <8 x i64> @ult_36_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12510,7 +12550,7 @@ define <8 x i64> @ugt_36_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12526,14 +12566,14 @@ define <8 x i64> @ugt_36_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_36_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12542,7 +12582,7 @@ define <8 x i64> @ugt_36_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12573,7 +12613,7 @@ define <8 x i64> @ult_37_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12589,14 +12629,14 @@ define <8 x i64> @ult_37_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_37_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12605,7 +12645,7 @@ define <8 x i64> @ult_37_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12636,7 +12676,7 @@ define <8 x i64> @ugt_37_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12652,14 +12692,14 @@ define <8 x i64> @ugt_37_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_37_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12668,7 +12708,7 @@ define <8 x i64> @ugt_37_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12699,7 +12739,7 @@ define <8 x i64> @ult_38_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12715,14 +12755,14 @@ define <8 x i64> @ult_38_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_38_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12731,7 +12771,7 @@ define <8 x i64> @ult_38_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12762,7 +12802,7 @@ define <8 x i64> @ugt_38_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12778,14 +12818,14 @@ define <8 x i64> @ugt_38_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_38_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12794,7 +12834,7 @@ define <8 x i64> @ugt_38_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12825,7 +12865,7 @@ define <8 x i64> @ult_39_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12841,14 +12881,14 @@ define <8 x i64> @ult_39_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_39_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12857,7 +12897,7 @@ define <8 x i64> @ult_39_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12888,7 +12928,7 @@ define <8 x i64> @ugt_39_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12904,14 +12944,14 @@ define <8 x i64> @ugt_39_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_39_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12920,7 +12960,7 @@ define <8 x i64> @ugt_39_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -12951,7 +12991,7 @@ define <8 x i64> @ult_40_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -12967,14 +13007,14 @@ define <8 x i64> @ult_40_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_40_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -12983,7 +13023,7 @@ define <8 x i64> @ult_40_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13014,7 +13054,7 @@ define <8 x i64> @ugt_40_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13030,14 +13070,14 @@ define <8 x i64> @ugt_40_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_40_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13046,7 +13086,7 @@ define <8 x i64> @ugt_40_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13077,7 +13117,7 @@ define <8 x i64> @ult_41_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13093,14 +13133,14 @@ define <8 x i64> @ult_41_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_41_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13109,7 +13149,7 @@ define <8 x i64> @ult_41_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13140,7 +13180,7 @@ define <8 x i64> @ugt_41_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13156,14 +13196,14 @@ define <8 x i64> @ugt_41_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_41_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13172,7 +13212,7 @@ define <8 x i64> @ugt_41_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13203,7 +13243,7 @@ define <8 x i64> @ult_42_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13219,14 +13259,14 @@ define <8 x i64> @ult_42_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_42_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13235,7 +13275,7 @@ define <8 x i64> @ult_42_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13266,7 +13306,7 @@ define <8 x i64> @ugt_42_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13282,14 +13322,14 @@ define <8 x i64> @ugt_42_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_42_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13298,7 +13338,7 @@ define <8 x i64> @ugt_42_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13329,7 +13369,7 @@ define <8 x i64> @ult_43_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13345,14 +13385,14 @@ define <8 x i64> @ult_43_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_43_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13361,7 +13401,7 @@ define <8 x i64> @ult_43_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13392,7 +13432,7 @@ define <8 x i64> @ugt_43_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13408,14 +13448,14 @@ define <8 x i64> @ugt_43_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_43_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13424,7 +13464,7 @@ define <8 x i64> @ugt_43_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13455,7 +13495,7 @@ define <8 x i64> @ult_44_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13471,14 +13511,14 @@ define <8 x i64> @ult_44_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_44_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13487,7 +13527,7 @@ define <8 x i64> @ult_44_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13518,7 +13558,7 @@ define <8 x i64> @ugt_44_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13534,14 +13574,14 @@ define <8 x i64> @ugt_44_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_44_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13550,7 +13590,7 @@ define <8 x i64> @ugt_44_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13581,7 +13621,7 @@ define <8 x i64> @ult_45_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13597,14 +13637,14 @@ define <8 x i64> @ult_45_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_45_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13613,7 +13653,7 @@ define <8 x i64> @ult_45_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13644,7 +13684,7 @@ define <8 x i64> @ugt_45_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13660,14 +13700,14 @@ define <8 x i64> @ugt_45_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_45_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13676,7 +13716,7 @@ define <8 x i64> @ugt_45_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13707,7 +13747,7 @@ define <8 x i64> @ult_46_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13723,14 +13763,14 @@ define <8 x i64> @ult_46_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_46_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13739,7 +13779,7 @@ define <8 x i64> @ult_46_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13770,7 +13810,7 @@ define <8 x i64> @ugt_46_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13786,14 +13826,14 @@ define <8 x i64> @ugt_46_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_46_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13802,7 +13842,7 @@ define <8 x i64> @ugt_46_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13833,7 +13873,7 @@ define <8 x i64> @ult_47_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13849,14 +13889,14 @@ define <8 x i64> @ult_47_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_47_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13865,7 +13905,7 @@ define <8 x i64> @ult_47_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13896,7 +13936,7 @@ define <8 x i64> @ugt_47_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13912,14 +13952,14 @@ define <8 x i64> @ugt_47_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_47_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13928,7 +13968,7 @@ define <8 x i64> @ugt_47_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -13959,7 +13999,7 @@ define <8 x i64> @ult_48_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -13975,14 +14015,14 @@ define <8 x i64> @ult_48_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_48_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -13991,7 +14031,7 @@ define <8 x i64> @ult_48_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14022,7 +14062,7 @@ define <8 x i64> @ugt_48_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14038,14 +14078,14 @@ define <8 x i64> @ugt_48_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_48_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14054,7 +14094,7 @@ define <8 x i64> @ugt_48_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14085,7 +14125,7 @@ define <8 x i64> @ult_49_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14101,14 +14141,14 @@ define <8 x i64> @ult_49_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_49_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14117,7 +14157,7 @@ define <8 x i64> @ult_49_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14148,7 +14188,7 @@ define <8 x i64> @ugt_49_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14164,14 +14204,14 @@ define <8 x i64> @ugt_49_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_49_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14180,7 +14220,7 @@ define <8 x i64> @ugt_49_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14211,7 +14251,7 @@ define <8 x i64> @ult_50_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14227,14 +14267,14 @@ define <8 x i64> @ult_50_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_50_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14243,7 +14283,7 @@ define <8 x i64> @ult_50_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14274,7 +14314,7 @@ define <8 x i64> @ugt_50_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14290,14 +14330,14 @@ define <8 x i64> @ugt_50_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_50_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14306,7 +14346,7 @@ define <8 x i64> @ugt_50_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14337,7 +14377,7 @@ define <8 x i64> @ult_51_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14353,14 +14393,14 @@ define <8 x i64> @ult_51_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_51_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14369,7 +14409,7 @@ define <8 x i64> @ult_51_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14400,7 +14440,7 @@ define <8 x i64> @ugt_51_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14416,14 +14456,14 @@ define <8 x i64> @ugt_51_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_51_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14432,7 +14472,7 @@ define <8 x i64> @ugt_51_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14463,7 +14503,7 @@ define <8 x i64> @ult_52_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14479,14 +14519,14 @@ define <8 x i64> @ult_52_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_52_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14495,7 +14535,7 @@ define <8 x i64> @ult_52_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14526,7 +14566,7 @@ define <8 x i64> @ugt_52_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14542,14 +14582,14 @@ define <8 x i64> @ugt_52_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_52_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14558,7 +14598,7 @@ define <8 x i64> @ugt_52_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14589,7 +14629,7 @@ define <8 x i64> @ult_53_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14605,14 +14645,14 @@ define <8 x i64> @ult_53_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_53_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14621,7 +14661,7 @@ define <8 x i64> @ult_53_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14652,7 +14692,7 @@ define <8 x i64> @ugt_53_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14668,14 +14708,14 @@ define <8 x i64> @ugt_53_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_53_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14684,7 +14724,7 @@ define <8 x i64> @ugt_53_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14715,7 +14755,7 @@ define <8 x i64> @ult_54_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14731,14 +14771,14 @@ define <8 x i64> @ult_54_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_54_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14747,7 +14787,7 @@ define <8 x i64> @ult_54_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14778,7 +14818,7 @@ define <8 x i64> @ugt_54_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14794,14 +14834,14 @@ define <8 x i64> @ugt_54_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_54_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14810,7 +14850,7 @@ define <8 x i64> @ugt_54_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14841,7 +14881,7 @@ define <8 x i64> @ult_55_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14857,14 +14897,14 @@ define <8 x i64> @ult_55_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_55_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14873,7 +14913,7 @@ define <8 x i64> @ult_55_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14904,7 +14944,7 @@ define <8 x i64> @ugt_55_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14920,14 +14960,14 @@ define <8 x i64> @ugt_55_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_55_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14936,7 +14976,7 @@ define <8 x i64> @ugt_55_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -14967,7 +15007,7 @@ define <8 x i64> @ult_56_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -14983,14 +15023,14 @@ define <8 x i64> @ult_56_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_56_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -14999,7 +15039,7 @@ define <8 x i64> @ult_56_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15030,7 +15070,7 @@ define <8 x i64> @ugt_56_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15046,14 +15086,14 @@ define <8 x i64> @ugt_56_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_56_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15062,7 +15102,7 @@ define <8 x i64> @ugt_56_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15093,7 +15133,7 @@ define <8 x i64> @ult_57_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15109,14 +15149,14 @@ define <8 x i64> @ult_57_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_57_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15125,7 +15165,7 @@ define <8 x i64> @ult_57_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15156,7 +15196,7 @@ define <8 x i64> @ugt_57_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15172,14 +15212,14 @@ define <8 x i64> @ugt_57_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_57_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15188,7 +15228,7 @@ define <8 x i64> @ugt_57_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15219,7 +15259,7 @@ define <8 x i64> @ult_58_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15235,14 +15275,14 @@ define <8 x i64> @ult_58_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_58_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15251,7 +15291,7 @@ define <8 x i64> @ult_58_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15282,7 +15322,7 @@ define <8 x i64> @ugt_58_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15298,14 +15338,14 @@ define <8 x i64> @ugt_58_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_58_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15314,7 +15354,7 @@ define <8 x i64> @ugt_58_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15345,7 +15385,7 @@ define <8 x i64> @ult_59_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15361,14 +15401,14 @@ define <8 x i64> @ult_59_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_59_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15377,7 +15417,7 @@ define <8 x i64> @ult_59_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15408,7 +15448,7 @@ define <8 x i64> @ugt_59_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15424,14 +15464,14 @@ define <8 x i64> @ugt_59_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_59_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15440,7 +15480,7 @@ define <8 x i64> @ugt_59_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15471,7 +15511,7 @@ define <8 x i64> @ult_60_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15487,14 +15527,14 @@ define <8 x i64> @ult_60_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_60_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15503,7 +15543,7 @@ define <8 x i64> @ult_60_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15534,7 +15574,7 @@ define <8 x i64> @ugt_60_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15550,14 +15590,14 @@ define <8 x i64> @ugt_60_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_60_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15566,7 +15606,7 @@ define <8 x i64> @ugt_60_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15597,7 +15637,7 @@ define <8 x i64> @ult_61_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15613,14 +15653,14 @@ define <8 x i64> @ult_61_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_61_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15629,7 +15669,7 @@ define <8 x i64> @ult_61_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15660,7 +15700,7 @@ define <8 x i64> @ugt_61_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15676,14 +15716,14 @@ define <8 x i64> @ugt_61_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_61_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15692,7 +15732,7 @@ define <8 x i64> @ugt_61_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15723,7 +15763,7 @@ define <8 x i64> @ult_62_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15739,14 +15779,14 @@ define <8 x i64> @ult_62_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_62_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15755,7 +15795,7 @@ define <8 x i64> @ult_62_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15786,7 +15826,7 @@ define <8 x i64> @ugt_62_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15802,14 +15842,14 @@ define <8 x i64> @ugt_62_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_62_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15818,7 +15858,7 @@ define <8 x i64> @ugt_62_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)
@@ -15849,7 +15889,7 @@ define <8 x i64> @ult_63_v8i64(<8 x i64> %0) {
 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    retq
 ;
@@ -15865,14 +15905,14 @@ define <8 x i64> @ult_63_v8i64(<8 x i64> %0) {
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ult_63_v8i64:
 ; AVX512VPOPCNTDQ:       # %bb.0:
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
-; AVX512VPOPCNTDQ-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; AVX512VPOPCNTDQ-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VPOPCNTDQ-NEXT:    retq
 ;
@@ -15881,7 +15921,7 @@ define <8 x i64> @ult_63_v8i64(<8 x i64> %0) {
 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
-; BITALG-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
+; BITALG-NEXT:    vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
 ; BITALG-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; BITALG-NEXT:    retq
   %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0)


        


More information about the llvm-commits mailing list