[llvm] da570ef - [DAG] Match select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 14 08:10:45 PDT 2023


Author: Simon Pilgrim
Date: 2023-03-14T15:10:30Z
New Revision: da570ef1b4f856603970ecb14299947fb6cd678a

URL: https://github.com/llvm/llvm-project/commit/da570ef1b4f856603970ecb14299947fb6cd678a
DIFF: https://github.com/llvm/llvm-project/commit/da570ef1b4f856603970ecb14299947fb6cd678a.diff

LOG: [DAG] Match select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns

Pulled out of PowerPC, and added ABDS support as well (hence the additional v4i32 PPC matches)

Differential Revision: https://reviews.llvm.org/D144789

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/test/CodeGen/AArch64/abd-combine.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
    llvm/test/CodeGen/X86/abds-vector-128.ll
    llvm/test/CodeGen/X86/abds-vector-256.ll
    llvm/test/CodeGen/X86/abds-vector-512.ll
    llvm/test/CodeGen/X86/abdu-vector-128.ll
    llvm/test/CodeGen/X86/abdu-vector-256.ll
    llvm/test/CodeGen/X86/abdu-vector-512.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4463873769bbe..7ab43845d5c75 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11840,6 +11840,38 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       }
     }
 
+    // Match VSELECTs with absolute 
diff erence patterns.
+    // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
+    // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
+    // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
+    // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
+    if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
+        N1.getOperand(0) == N2.getOperand(1) &&
+        N1.getOperand(1) == N2.getOperand(0)) {
+      bool IsSigned = isSignedIntSetCC(CC);
+      unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
+      if (hasOperation(ABDOpc, VT)) {
+        switch (CC) {
+        case ISD::SETGT:
+        case ISD::SETGE:
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+          if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
+            return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+          break;
+        case ISD::SETLT:
+        case ISD::SETLE:
+        case ISD::SETULT:
+        case ISD::SETULE:
+          if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
+            return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+          break;
+        default:
+          break;
+        }
+      }
+    }
+
     // Match VSELECTs into add with unsigned saturation.
     if (hasOperation(ISD::UADDSAT, VT)) {
       // Check if one of the arms of the VSELECT is vector with all bits set.

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4011aaff50a0a..03a387570e3c6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1390,10 +1390,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
   }
 
-  if (Subtarget.hasP9Altivec()) {
-    setTargetDAGCombine({ISD::VSELECT});
-  }
-
   setLibcallName(RTLIB::LOG_F128, "logf128");
   setLibcallName(RTLIB::LOG2_F128, "log2f128");
   setLibcallName(RTLIB::LOG10_F128, "log10f128");
@@ -16078,8 +16074,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
-  case ISD::VSELECT:
-    return combineVSelect(N, DCI);
   }
 
   return SDValue();
@@ -17705,69 +17699,6 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
   return true;
 }
 
-// For type v4i32/v8ii16/v16i8, transform
-// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (abdu a, b)
-// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (abdu a, b)
-// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (abdu a, b)
-// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (abdu a, b)
-// TODO: Move this to DAGCombiner?
-SDValue PPCTargetLowering::combineVSelect(SDNode *N,
-                                          DAGCombinerInfo &DCI) const {
-  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
-  assert(Subtarget.hasP9Altivec() &&
-         "Only combine this when P9 altivec supported!");
-
-  SelectionDAG &DAG = DCI.DAG;
-  SDLoc dl(N);
-  SDValue Cond = N->getOperand(0);
-  SDValue TrueOpnd = N->getOperand(1);
-  SDValue FalseOpnd = N->getOperand(2);
-  EVT VT = N->getOperand(1).getValueType();
-
-  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
-      FalseOpnd.getOpcode() != ISD::SUB)
-    return SDValue();
-
-  // ABSD only available for type v4i32/v8i16/v16i8
-  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
-    return SDValue();
-
-  // At least to save one more dependent computation
-  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
-    return SDValue();
-
-  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
-  // Can only handle unsigned comparison here
-  switch (CC) {
-  default:
-    return SDValue();
-  case ISD::SETUGT:
-  case ISD::SETUGE:
-    break;
-  case ISD::SETULT:
-  case ISD::SETULE:
-    std::swap(TrueOpnd, FalseOpnd);
-    break;
-  }
-
-  SDValue CmpOpnd1 = Cond.getOperand(0);
-  SDValue CmpOpnd2 = Cond.getOperand(1);
-
-  // SETCC CmpOpnd1 CmpOpnd2 cond
-  // TrueOpnd = CmpOpnd1 - CmpOpnd2
-  // FalseOpnd = CmpOpnd2 - CmpOpnd1
-  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
-      TrueOpnd.getOperand(1) == CmpOpnd2 &&
-      FalseOpnd.getOperand(0) == CmpOpnd2 &&
-      FalseOpnd.getOperand(1) == CmpOpnd1) {
-    return DAG.getNode(ISD::ABDU, dl, N->getOperand(1).getValueType(), CmpOpnd1,
-                       CmpOpnd2, DAG.getTargetConstant(0, dl, MVT::i32));
-  }
-
-  return SDValue();
-}
-
 /// getAddrModeForFlags - Based on the set of address flags, select the most
 /// optimal instruction format to match by.
 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 21fad5a12a8fe..9706fddb0fc0b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1416,7 +1416,6 @@ namespace llvm {
     SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
                                  SelectionDAG &DAG) const;
     SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,

diff  --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index dad3fd08a7b9a..a7e0c26fd7a15 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -107,10 +107,7 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
 define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abdu_ugt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhi v2.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp ugt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -122,10 +119,7 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abdu_uge:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhs v2.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp uge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -137,10 +131,7 @@ define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abdu_ult:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhi v2.8h, v1.8h, v0.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp ult <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -152,10 +143,7 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abdu_ule:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhs v2.8h, v1.8h, v0.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp ule <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -167,10 +155,7 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abds_sgt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmgt v2.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp sgt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -182,10 +167,7 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abds_sge:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v2.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp sge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -197,10 +179,7 @@ define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abds_slt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmgt v2.8h, v1.8h, v0.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp slt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
@@ -212,10 +191,7 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
 define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
 ; CHECK-LABEL: abds_sle:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v2.8h, v1.8h, v0.8h
-; CHECK-NEXT:    sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %3 = icmp sle <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 9e9271ed7c5d7..342a9044b9bcc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -1834,13 +1834,20 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
 ; Tests for ABDS icmp + sub + select sequence
 
 define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sgt:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpgtsw v4, v2, v3
-; CHECK-NEXT:    vsubuwm v5, v2, v3
-; CHECK-NEXT:    vsubuwm v2, v3, v2
-; CHECK-NEXT:    xxsel v2, v2, v5, v4
-; CHECK-NEXT:    blr
+; CHECK-PWR9-LABEL: absd_int32_sgt:
+; CHECK-PWR9:       # %bb.0:
+; CHECK-PWR9-NEXT:    xvnegsp v3, v3
+; CHECK-PWR9-NEXT:    xvnegsp v2, v2
+; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT:    blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sgt:
+; CHECK-PWR78:       # %bb.0:
+; CHECK-PWR78-NEXT:    vcmpgtsw v4, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v5, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT:    xxsel v2, v2, v5, v4
+; CHECK-PWR78-NEXT:    blr
   %3 = icmp sgt <4 x i32> %0, %1
   %4 = sub <4 x i32> %0, %1
   %5 = sub <4 x i32> %1, %0
@@ -1849,14 +1856,21 @@ define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
 }
 
 define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sge:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpgtsw v4, v3, v2
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    vsubuwm v4, v2, v3
-; CHECK-NEXT:    vsubuwm v2, v3, v2
-; CHECK-NEXT:    xxsel v2, v2, v4, vs0
-; CHECK-NEXT:    blr
+; CHECK-PWR9-LABEL: absd_int32_sge:
+; CHECK-PWR9:       # %bb.0:
+; CHECK-PWR9-NEXT:    xvnegsp v3, v3
+; CHECK-PWR9-NEXT:    xvnegsp v2, v2
+; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT:    blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sge:
+; CHECK-PWR78:       # %bb.0:
+; CHECK-PWR78-NEXT:    vcmpgtsw v4, v3, v2
+; CHECK-PWR78-NEXT:    xxlnor vs0, v4, v4
+; CHECK-PWR78-NEXT:    vsubuwm v4, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT:    xxsel v2, v2, v4, vs0
+; CHECK-PWR78-NEXT:    blr
   %3 = icmp sge <4 x i32> %0, %1
   %4 = sub <4 x i32> %0, %1
   %5 = sub <4 x i32> %1, %0
@@ -1865,13 +1879,20 @@ define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
 }
 
 define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_slt:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpgtsw v4, v3, v2
-; CHECK-NEXT:    vsubuwm v5, v2, v3
-; CHECK-NEXT:    vsubuwm v2, v3, v2
-; CHECK-NEXT:    xxsel v2, v5, v2, v4
-; CHECK-NEXT:    blr
+; CHECK-PWR9-LABEL: absd_int32_slt:
+; CHECK-PWR9:       # %bb.0:
+; CHECK-PWR9-NEXT:    xvnegsp v3, v3
+; CHECK-PWR9-NEXT:    xvnegsp v2, v2
+; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT:    blr
+;
+; CHECK-PWR78-LABEL: absd_int32_slt:
+; CHECK-PWR78:       # %bb.0:
+; CHECK-PWR78-NEXT:    vcmpgtsw v4, v3, v2
+; CHECK-PWR78-NEXT:    vsubuwm v5, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT:    xxsel v2, v5, v2, v4
+; CHECK-PWR78-NEXT:    blr
   %3 = icmp slt <4 x i32> %0, %1
   %4 = sub <4 x i32> %0, %1
   %5 = sub <4 x i32> %1, %0
@@ -1880,14 +1901,21 @@ define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
 }
 
 define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sle:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpgtsw v4, v2, v3
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    vsubuwm v4, v2, v3
-; CHECK-NEXT:    vsubuwm v2, v3, v2
-; CHECK-NEXT:    xxsel v2, v4, v2, vs0
-; CHECK-NEXT:    blr
+; CHECK-PWR9-LABEL: absd_int32_sle:
+; CHECK-PWR9:       # %bb.0:
+; CHECK-PWR9-NEXT:    xvnegsp v3, v3
+; CHECK-PWR9-NEXT:    xvnegsp v2, v2
+; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT:    blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sle:
+; CHECK-PWR78:       # %bb.0:
+; CHECK-PWR78-NEXT:    vcmpgtsw v4, v2, v3
+; CHECK-PWR78-NEXT:    xxlnor vs0, v4, v4
+; CHECK-PWR78-NEXT:    vsubuwm v4, v2, v3
+; CHECK-PWR78-NEXT:    vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT:    xxsel v2, v4, v2, vs0
+; CHECK-PWR78-NEXT:    blr
   %3 = icmp sle <4 x i32> %0, %1
   %4 = sub <4 x i32> %0, %1
   %5 = sub <4 x i32> %1, %0

diff  --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 6533e2cc20b3d..a48781c6ebf8e 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -799,38 +799,17 @@ define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE42-LABEL: abd_cmp_v16i8:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pcmpgtb %xmm1, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubb %xmm1, %xmm3
-; SSE42-NEXT:    psubb %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
+; SSE42-NEXT:    pminsb %xmm1, %xmm2
+; SSE42-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE42-NEXT:    psubb %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: abd_cmp_v16i8:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v16i8:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubb %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: abd_cmp_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubb %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp sgt <16 x i8> %a, %b
   %ab = sub <16 x i8> %a, %b
   %ba = sub <16 x i8> %b, %a
@@ -839,54 +818,20 @@ define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 }
 
 define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v8i16:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtw %xmm0, %xmm2
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    psubw %xmm1, %xmm3
-; SSE2-NEXT:    psubw %xmm0, %xmm1
-; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    pandn %xmm3, %xmm2
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE42-LABEL: abd_cmp_v8i16:
-; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm1, %xmm2
-; SSE42-NEXT:    pcmpgtw %xmm0, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubw %xmm1, %xmm3
-; SSE42-NEXT:    psubw %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
-; SSE42-NEXT:    retq
-;
-; AVX1-LABEL: abd_cmp_v8i16:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v8i16:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm2
-; AVX2-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT:    retq
+; SSE-LABEL: abd_cmp_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pminsw %xmm1, %xmm2
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    psubw %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX512-LABEL: abd_cmp_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp sge <8 x i16> %a, %b
   %ab = sub <8 x i16> %a, %b
   %ba = sub <8 x i16> %b, %a
@@ -910,39 +855,18 @@ define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ;
 ; SSE42-LABEL: abd_cmp_v4i32:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm1, %xmm2
-; SSE42-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubd %xmm1, %xmm3
-; SSE42-NEXT:    psubd %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    blendvps %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movaps %xmm3, %xmm0
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    pminsd %xmm1, %xmm2
+; SSE42-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE42-NEXT:    psubd %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: abd_cmp_v4i32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v4i32:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm2
-; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: abd_cmp_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubd %xmm0, %xmm1, %xmm2 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp slt <4 x i32> %a, %b
   %ab = sub <4 x i32> %a, %b
   %ba = sub <4 x i32> %b, %a
@@ -976,38 +900,37 @@ define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ;
 ; SSE42-LABEL: abd_cmp_v2i64:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm1, %xmm2
-; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm2
 ; SSE42-NEXT:    movdqa %xmm0, %xmm3
 ; SSE42-NEXT:    psubq %xmm1, %xmm3
 ; SSE42-NEXT:    psubq %xmm0, %xmm1
 ; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movapd %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: abd_cmp_v2i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
 ; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
 ; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v2i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
+; AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp sge <2 x i64> %a, %b
   %ab = sub <2 x i64> %a, %b
@@ -1174,44 +1097,47 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
 ; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psubq %xmm1, %xmm2
-; SSE42-NEXT:    movdqa %xmm1, %xmm3
-; SSE42-NEXT:    psubq %xmm0, %xmm3
-; SSE42-NEXT:    pcmpgtq %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    movdqa %xmm2, %xmm3
+; SSE42-NEXT:    psubq %xmm1, %xmm3
+; SSE42-NEXT:    movdqa %xmm1, %xmm4
+; SSE42-NEXT:    psubq %xmm2, %xmm4
+; SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm4
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
 ; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE42-NEXT:    pxor %xmm1, %xmm0
-; SSE42-NEXT:    paddq %xmm2, %xmm0
+; SSE42-NEXT:    paddq %xmm4, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm3, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm3
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm3, %xmm4, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
-; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
 ; AVX512-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
@@ -1226,60 +1152,25 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
 }
 
 define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pcmpgtw %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    psubw %xmm1, %xmm3
-; SSE2-NEXT:    psubw %xmm0, %xmm1
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm0, %xmm2
-; SSE2-NEXT:    paddw %xmm3, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
-; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pcmpgtw %xmm1, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubw %xmm1, %xmm3
-; SSE42-NEXT:    psubw %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT:    paddw %xmm1, %xmm3
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
-; SSE42-NEXT:    retq
-;
-; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
-; AVX2-NEXT:    retq
+; SSE-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    psubw %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pminsw %xmm1, %xmm3
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    psubw %xmm3, %xmm0
+; SSE-NEXT:    paddw %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm3
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp sgt <8 x i16> %a, %b
   %ab = sub <8 x i16> %a, %b
   %ba = sub <8 x i16> %b, %a

diff  --git a/llvm/test/CodeGen/X86/abds-vector-256.ll b/llvm/test/CodeGen/X86/abds-vector-256.ll
index b9bd875cee76e..78190d2cb7d8b 100644
--- a/llvm/test/CodeGen/X86/abds-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-256.ll
@@ -441,34 +441,27 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm5
-; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vandnps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT:    vandps %ymm4, %ymm5, %ymm1
-; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v32i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm2
-; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v32i8:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubb %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubb %ymm0, %ymm1, %ymm3
-; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT:    vpminsb %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp sgt <32 x i8> %a, %b
   %ab = sub <32 x i8> %a, %b
@@ -480,36 +473,29 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: abd_cmp_v16i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT:    vpsubw %xmm3, %xmm2, %xmm5
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vandps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT:    vandnps %ymm5, %ymm4, %ymm1
-; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm2
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v16i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubw %ymm0, %ymm1, %ymm3
-; AVX512-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; AVX512-NEXT:    vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    vpminsw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp sge <16 x i16> %a, %b
   %ab = sub <16 x i16> %a, %b
@@ -521,33 +507,29 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: abd_cmp_v8i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm4, %xmm5
-; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm7
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm6, %xmm0
-; AVX1-NEXT:    vpsubd %xmm3, %xmm4, %xmm1
-; AVX1-NEXT:    vblendvps %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v8i32:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm2
-; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v8i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpsubd %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubd %ymm0, %ymm1, %ymm2 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp slt <8 x i32> %a, %b
   %ab = sub <8 x i32> %a, %b
@@ -559,33 +541,32 @@ define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: abd_cmp_v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
-; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vpsubq %xmm4, %xmm3, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm5
+; AVX1-NEXT:    vpsubq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm4
 ; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm6, %xmm0
-; AVX1-NEXT:    vpsubq %xmm3, %xmm4, %xmm1
-; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
 ; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm3
 ; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v4i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpsubq %ymm0, %ymm1, %ymm2
-; AVX512-NEXT:    vpsubq %ymm1, %ymm0, %ymm2 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp sge <4 x i64> %a, %b
   %ab = sub <4 x i64> %a, %b

diff  --git a/llvm/test/CodeGen/X86/abds-vector-512.ll b/llvm/test/CodeGen/X86/abds-vector-512.ll
index 65daad55c5cd8..359b962a152b2 100644
--- a/llvm/test/CodeGen/X86/abds-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-512.ll
@@ -267,26 +267,22 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
 define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
 ; AVX512BW-LABEL: abd_cmp_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k1
-; AVX512BW-NEXT:    vpsubb %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: abd_cmp_v64i8:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT:    vpsubb %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT:    vpsubb %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpminsb %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpmaxsb %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpminsb %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogq $184, %zmm5, %zmm4, %zmm0
 ; AVX512DQ-NEXT:    retq
   %cmp = icmp sgt <64 x i8> %a, %b
   %ab = sub <64 x i8> %a, %b
@@ -298,26 +294,22 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
 define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
 ; AVX512BW-LABEL: abd_cmp_v32i16:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k1
-; AVX512BW-NEXT:    vpsubw %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: abd_cmp_v32i16:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm5
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm2, %ymm5
-; AVX512DQ-NEXT:    vpsubw %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT:    vpsubw %ymm2, %ymm3, %ymm2
-; AVX512DQ-NEXT:    vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512DQ-NEXT:    vpminsw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpmaxsw %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpminsw %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512DQ-NEXT:    retq
   %cmp = icmp sge <32 x i16> %a, %b
   %ab = sub <32 x i16> %a, %b
@@ -329,10 +321,9 @@ define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
 define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
 ; AVX512-LABEL: abd_cmp_v16i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
-; AVX512-NEXT:    vpsubd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT:    vpsubd %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp slt <16 x i32> %a, %b
   %ab = sub <16 x i32> %a, %b
@@ -344,10 +335,9 @@ define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
 define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
 ; AVX512-LABEL: abd_cmp_v8i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltq %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpsubq %zmm0, %zmm1, %zmm2
-; AVX512-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp sge <8 x i64> %a, %b
   %ab = sub <8 x i64> %a, %b

diff  --git a/llvm/test/CodeGen/X86/abdu-vector-128.ll b/llvm/test/CodeGen/X86/abdu-vector-128.ll
index 910fe49d33a0e..88496032aa307 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-128.ll
@@ -641,59 +641,20 @@ define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ;
 
 define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v16i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    pminub %xmm1, %xmm2
-; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    psubb %xmm1, %xmm3
-; SSE2-NEXT:    psubb %xmm0, %xmm1
-; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    pandn %xmm3, %xmm2
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE42-LABEL: abd_cmp_v16i8:
-; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pminub %xmm1, %xmm2
-; SSE42-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubb %xmm1, %xmm3
-; SSE42-NEXT:    psubb %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
-; SSE42-NEXT:    retq
-;
-; AVX1-LABEL: abd_cmp_v16i8:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v16i8:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubb %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT:    retq
+; SSE-LABEL: abd_cmp_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pminub %xmm1, %xmm2
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    psubb %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX512-LABEL: abd_cmp_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubb %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp ugt <16 x i8> %a, %b
   %ab = sub <16 x i8> %a, %b
   %ba = sub <16 x i8> %b, %a
@@ -720,42 +681,17 @@ define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE42-LABEL: abd_cmp_v8i16:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pmaxuw %xmm1, %xmm2
-; SSE42-NEXT:    pcmpeqw %xmm0, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubw %xmm1, %xmm3
-; SSE42-NEXT:    psubw %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT:    movdqa %xmm1, %xmm0
+; SSE42-NEXT:    pminuw %xmm1, %xmm2
+; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE42-NEXT:    psubw %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: abd_cmp_v8i16:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v8i16:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: abd_cmp_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp uge <8 x i16> %a, %b
   %ab = sub <8 x i16> %a, %b
   %ba = sub <8 x i16> %b, %a
@@ -783,41 +719,17 @@ define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE42-LABEL: abd_cmp_v4i32:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pmaxud %xmm1, %xmm2
-; SSE42-NEXT:    pcmpeqd %xmm0, %xmm2
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubd %xmm1, %xmm3
-; SSE42-NEXT:    psubd %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    blendvps %xmm0, %xmm3, %xmm1
-; SSE42-NEXT:    movaps %xmm1, %xmm0
+; SSE42-NEXT:    pminud %xmm1, %xmm2
+; SSE42-NEXT:    pmaxud %xmm1, %xmm0
+; SSE42-NEXT:    psubd %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: abd_cmp_v4i32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v4i32:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: abd_cmp_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpltud %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubd %xmm0, %xmm1, %xmm2 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp ult <4 x i32> %a, %b
   %ab = sub <4 x i32> %a, %b
   %ba = sub <4 x i32> %b, %a
@@ -852,46 +764,45 @@ define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE42-LABEL: abd_cmp_v2i64:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE42-NEXT:    movdqa %xmm0, %xmm3
+; SSE42-NEXT:    movdqa %xmm1, %xmm3
 ; SSE42-NEXT:    pxor %xmm2, %xmm3
-; SSE42-NEXT:    pxor %xmm1, %xmm2
+; SSE42-NEXT:    pxor %xmm0, %xmm2
 ; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
 ; SSE42-NEXT:    movdqa %xmm0, %xmm3
 ; SSE42-NEXT:    psubq %xmm1, %xmm3
 ; SSE42-NEXT:    psubq %xmm0, %xmm1
 ; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT:    movapd %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: abd_cmp_v2i64:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
 ; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v2i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
 ; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
 ; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v2i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
+; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp uge <2 x i64> %a, %b
   %ab = sub <2 x i64> %a, %b
@@ -932,50 +843,52 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
 ;
 ; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    psubq %xmm1, %xmm2
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; SSE42-NEXT:    movdqa %xmm1, %xmm3
-; SSE42-NEXT:    psubq %xmm0, %xmm3
-; SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; SSE42-NEXT:    pxor %xmm4, %xmm1
-; SSE42-NEXT:    pxor %xmm4, %xmm0
-; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
-; SSE42-NEXT:    paddq %xmm3, %xmm0
+; SSE42-NEXT:    pxor %xmm2, %xmm3
+; SSE42-NEXT:    pxor %xmm0, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm2
+; SSE42-NEXT:    movdqa %xmm0, %xmm3
+; SSE42-NEXT:    psubq %xmm1, %xmm3
+; SSE42-NEXT:    psubq %xmm0, %xmm1
+; SSE42-NEXT:    movdqa %xmm2, %xmm0
+; SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT:    paddq %xmm1, %xmm2
+; SSE42-NEXT:    movdqa %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
-; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm3
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vpxor %xmm4, %xmm1, %xmm1
-; AVX2-NEXT:    vpxor %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
-; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vpaddq %xmm0, %xmm2, %xmm2 {%k1}
-; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
+; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
   %cmp = icmp ugt <2 x i64> %a, %b
   %ab = sub <2 x i64> %a, %b
@@ -1007,46 +920,22 @@ define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwin
 ; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movdqa %xmm0, %xmm2
-; SSE42-NEXT:    pmaxuw %xmm1, %xmm2
-; SSE42-NEXT:    pcmpeqw %xmm0, %xmm2
+; SSE42-NEXT:    psubw %xmm1, %xmm2
 ; SSE42-NEXT:    movdqa %xmm0, %xmm3
-; SSE42-NEXT:    psubw %xmm1, %xmm3
-; SSE42-NEXT:    psubw %xmm0, %xmm1
-; SSE42-NEXT:    movdqa %xmm2, %xmm0
-; SSE42-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT:    paddw %xmm1, %xmm3
-; SSE42-NEXT:    movdqa %xmm3, %xmm0
+; SSE42-NEXT:    pminuw %xmm1, %xmm3
+; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE42-NEXT:    psubw %xmm3, %xmm0
+; SSE42-NEXT:    paddw %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm3
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    retq
   %cmp = icmp uge <8 x i16> %a, %b
   %ab = sub <8 x i16> %a, %b
   %ba = sub <8 x i16> %b, %a

diff  --git a/llvm/test/CodeGen/X86/abdu-vector-256.ll b/llvm/test/CodeGen/X86/abdu-vector-256.ll
index be6c7442bf0ac..3957133574ce5 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-256.ll
@@ -466,37 +466,26 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm5
-; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm0, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm5
-; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vandps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT:    vandnps %ymm5, %ymm4, %ymm1
-; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v32i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm2
-; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
-; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v32i8:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubb %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubb %ymm0, %ymm1, %ymm3
-; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp ugt <32 x i8> %a, %b
   %ab = sub <32 x i8> %a, %b
@@ -510,38 +499,27 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm4
-; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm5
-; AVX1-NEXT:    vpcmpeqw %xmm5, %xmm0, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT:    vpsubw %xmm2, %xmm3, %xmm5
-; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT:    vpsubw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vandnps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT:    vandps %ymm4, %ymm5, %ymm1
-; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v16i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsubw %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubw %ymm0, %ymm1, %ymm3
-; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
-; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp uge <16 x i16> %a, %b
   %ab = sub <16 x i16> %a, %b
@@ -553,36 +531,29 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: abd_cmp_v8i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vpmaxud %xmm3, %xmm4, %xmm5
-; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm5
-; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm6
-; AVX1-NEXT:    vpsubd %xmm3, %xmm4, %xmm7
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvps %xmm2, %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm1
-; AVX1-NEXT:    vblendvps %xmm5, %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v8i32:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2
-; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm2
-; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm3
-; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vblendvps %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v8i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpltud %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpsubd %ymm1, %ymm0, %ymm2
-; AVX512-NEXT:    vpsubd %ymm0, %ymm1, %ymm2 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp ult <8 x i32> %a, %b
   %ab = sub <8 x i32> %a, %b
@@ -594,41 +565,40 @@ define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: abd_cmp_v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vpxor %xmm2, %xmm4, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT:    vpxor %xmm2, %xmm6, %xmm2
-; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm5
-; AVX1-NEXT:    vpsubq %xmm6, %xmm4, %xmm7
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpsubq %xmm2, %xmm5, %xmm6
+; AVX1-NEXT:    vpsubq %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm6, %xmm2, %xmm2
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm4
 ; AVX1-NEXT:    vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm5, %xmm0
-; AVX1-NEXT:    vpsubq %xmm4, %xmm6, %xmm1
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: abd_cmp_v4i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
 ; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm3
 ; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: abd_cmp_v4i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpsubq %ymm0, %ymm1, %ymm2
-; AVX512-NEXT:    vpsubq %ymm1, %ymm0, %ymm2 {%k1}
-; AVX512-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %cmp = icmp uge <4 x i64> %a, %b
   %ab = sub <4 x i64> %a, %b

diff  --git a/llvm/test/CodeGen/X86/abdu-vector-512.ll b/llvm/test/CodeGen/X86/abdu-vector-512.ll
index 19af5d3694f42..a855bea61e530 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-512.ll
@@ -267,10 +267,9 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
 define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
 ; AVX512BW-LABEL: abd_cmp_v64i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k1
-; AVX512BW-NEXT:    vpsubb %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: abd_cmp_v64i8:
@@ -278,17 +277,12 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
 ; AVX512DQ-NEXT:    vpminub %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpcmpeqb %ymm4, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm0, %ymm5
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT:    vpsubb %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT:    vpsubb %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpmaxub %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpminub %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512DQ-NEXT:    retq
   %cmp = icmp ugt <64 x i8> %a, %b
   %ab = sub <64 x i8> %a, %b
@@ -300,28 +294,22 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
 define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
 ; AVX512BW-LABEL: abd_cmp_v32i16:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k1
-; AVX512BW-NEXT:    vpsubw %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsubw %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: abd_cmp_v32i16:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpcmpeqw %ymm4, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm0, %ymm5
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT:    vpsubw %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT:    vpsubw %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpminuw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpmaxuw %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpsubw %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpminuw %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogq $184, %zmm5, %zmm4, %zmm0
 ; AVX512DQ-NEXT:    retq
   %cmp = icmp uge <32 x i16> %a, %b
   %ab = sub <32 x i16> %a, %b
@@ -333,10 +321,9 @@ define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
 define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
 ; AVX512-LABEL: abd_cmp_v16i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpltud %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpsubd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT:    vpsubd %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpsubd %zmm2, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp ult <16 x i32> %a, %b
   %ab = sub <16 x i32> %a, %b
@@ -348,10 +335,9 @@ define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
 define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
 ; AVX512-LABEL: abd_cmp_v8i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpsubq %zmm0, %zmm1, %zmm2
-; AVX512-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpsubq %zmm2, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %cmp = icmp uge <8 x i64> %a, %b
   %ab = sub <8 x i64> %a, %b


        


More information about the llvm-commits mailing list