[llvm] da570ef - [DAG] Match select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 14 08:10:45 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-14T15:10:30Z
New Revision: da570ef1b4f856603970ecb14299947fb6cd678a
URL: https://github.com/llvm/llvm-project/commit/da570ef1b4f856603970ecb14299947fb6cd678a
DIFF: https://github.com/llvm/llvm-project/commit/da570ef1b4f856603970ecb14299947fb6cd678a.diff
LOG: [DAG] Match select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Pulled out of PowerPC, and added ABDS support as well (hence the additional v4i32 PPC matches)
Differential Revision: https://reviews.llvm.org/D144789
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/AArch64/abd-combine.ll
llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
llvm/test/CodeGen/X86/abds-vector-128.ll
llvm/test/CodeGen/X86/abds-vector-256.ll
llvm/test/CodeGen/X86/abds-vector-512.ll
llvm/test/CodeGen/X86/abdu-vector-128.ll
llvm/test/CodeGen/X86/abdu-vector-256.ll
llvm/test/CodeGen/X86/abdu-vector-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4463873769bbe..7ab43845d5c75 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11840,6 +11840,38 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // Match VSELECTs with absolute
diff erence patterns.
+ // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
+ if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
+ N1.getOperand(0) == N2.getOperand(1) &&
+ N1.getOperand(1) == N2.getOperand(0)) {
+ bool IsSigned = isSignedIntSetCC(CC);
+ unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
+ if (hasOperation(ABDOpc, VT)) {
+ switch (CC) {
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
// Match VSELECTs into add with unsigned saturation.
if (hasOperation(ISD::UADDSAT, VT)) {
// Check if one of the arms of the VSELECT is vector with all bits set.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4011aaff50a0a..03a387570e3c6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1390,10 +1390,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
}
- if (Subtarget.hasP9Altivec()) {
- setTargetDAGCombine({ISD::VSELECT});
- }
-
setLibcallName(RTLIB::LOG_F128, "logf128");
setLibcallName(RTLIB::LOG2_F128, "log2f128");
setLibcallName(RTLIB::LOG10_F128, "log10f128");
@@ -16078,8 +16074,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
- case ISD::VSELECT:
- return combineVSelect(N, DCI);
}
return SDValue();
@@ -17705,69 +17699,6 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
return true;
}
-// For type v4i32/v8ii16/v16i8, transform
-// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (abdu a, b)
-// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (abdu a, b)
-// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (abdu a, b)
-// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (abdu a, b)
-// TODO: Move this to DAGCombiner?
-SDValue PPCTargetLowering::combineVSelect(SDNode *N,
- DAGCombinerInfo &DCI) const {
- assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
- assert(Subtarget.hasP9Altivec() &&
- "Only combine this when P9 altivec supported!");
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(N);
- SDValue Cond = N->getOperand(0);
- SDValue TrueOpnd = N->getOperand(1);
- SDValue FalseOpnd = N->getOperand(2);
- EVT VT = N->getOperand(1).getValueType();
-
- if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
- FalseOpnd.getOpcode() != ISD::SUB)
- return SDValue();
-
- // ABSD only available for type v4i32/v8i16/v16i8
- if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
- return SDValue();
-
- // At least to save one more dependent computation
- if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
- return SDValue();
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- // Can only handle unsigned comparison here
- switch (CC) {
- default:
- return SDValue();
- case ISD::SETUGT:
- case ISD::SETUGE:
- break;
- case ISD::SETULT:
- case ISD::SETULE:
- std::swap(TrueOpnd, FalseOpnd);
- break;
- }
-
- SDValue CmpOpnd1 = Cond.getOperand(0);
- SDValue CmpOpnd2 = Cond.getOperand(1);
-
- // SETCC CmpOpnd1 CmpOpnd2 cond
- // TrueOpnd = CmpOpnd1 - CmpOpnd2
- // FalseOpnd = CmpOpnd2 - CmpOpnd1
- if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
- TrueOpnd.getOperand(1) == CmpOpnd2 &&
- FalseOpnd.getOperand(0) == CmpOpnd2 &&
- FalseOpnd.getOperand(1) == CmpOpnd1) {
- return DAG.getNode(ISD::ABDU, dl, N->getOperand(1).getValueType(), CmpOpnd1,
- CmpOpnd2, DAG.getTargetConstant(0, dl, MVT::i32));
- }
-
- return SDValue();
-}
-
/// getAddrModeForFlags - Based on the set of address flags, select the most
/// optimal instruction format to match by.
PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 21fad5a12a8fe..9706fddb0fc0b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1416,7 +1416,6 @@ namespace llvm {
SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index dad3fd08a7b9a..a7e0c26fd7a15 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -107,10 +107,7 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ugt:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmhi v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp ugt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -122,10 +119,7 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_uge:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmhs v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp uge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -137,10 +131,7 @@ define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ult:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmhi v2.8h, v1.8h, v0.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp ult <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -152,10 +143,7 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ule:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmhs v2.8h, v1.8h, v0.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp ule <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -167,10 +155,7 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sgt:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmgt v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp sgt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -182,10 +167,7 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sge:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmge v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp sge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -197,10 +179,7 @@ define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_slt:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmgt v2.8h, v1.8h, v0.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp slt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@@ -212,10 +191,7 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sle:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmge v2.8h, v1.8h, v0.8h
-; CHECK-NEXT: sub v3.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%3 = icmp sle <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 9e9271ed7c5d7..342a9044b9bcc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -1834,13 +1834,20 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
; Tests for ABDS icmp + sub + select sequence
define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sgt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpgtsw v4, v2, v3
-; CHECK-NEXT: vsubuwm v5, v2, v3
-; CHECK-NEXT: vsubuwm v2, v3, v2
-; CHECK-NEXT: xxsel v2, v2, v5, v4
-; CHECK-NEXT: blr
+; CHECK-PWR9-LABEL: absd_int32_sgt:
+; CHECK-PWR9: # %bb.0:
+; CHECK-PWR9-NEXT: xvnegsp v3, v3
+; CHECK-PWR9-NEXT: xvnegsp v2, v2
+; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT: blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sgt:
+; CHECK-PWR78: # %bb.0:
+; CHECK-PWR78-NEXT: vcmpgtsw v4, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
+; CHECK-PWR78-NEXT: blr
%3 = icmp sgt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
@@ -1849,14 +1856,21 @@ define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
}
define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sge:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpgtsw v4, v3, v2
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: vsubuwm v4, v2, v3
-; CHECK-NEXT: vsubuwm v2, v3, v2
-; CHECK-NEXT: xxsel v2, v2, v4, vs0
-; CHECK-NEXT: blr
+; CHECK-PWR9-LABEL: absd_int32_sge:
+; CHECK-PWR9: # %bb.0:
+; CHECK-PWR9-NEXT: xvnegsp v3, v3
+; CHECK-PWR9-NEXT: xvnegsp v2, v2
+; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT: blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sge:
+; CHECK-PWR78: # %bb.0:
+; CHECK-PWR78-NEXT: vcmpgtsw v4, v3, v2
+; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
+; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT: xxsel v2, v2, v4, vs0
+; CHECK-PWR78-NEXT: blr
%3 = icmp sge <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
@@ -1865,13 +1879,20 @@ define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
}
define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_slt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpgtsw v4, v3, v2
-; CHECK-NEXT: vsubuwm v5, v2, v3
-; CHECK-NEXT: vsubuwm v2, v3, v2
-; CHECK-NEXT: xxsel v2, v5, v2, v4
-; CHECK-NEXT: blr
+; CHECK-PWR9-LABEL: absd_int32_slt:
+; CHECK-PWR9: # %bb.0:
+; CHECK-PWR9-NEXT: xvnegsp v3, v3
+; CHECK-PWR9-NEXT: xvnegsp v2, v2
+; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT: blr
+;
+; CHECK-PWR78-LABEL: absd_int32_slt:
+; CHECK-PWR78: # %bb.0:
+; CHECK-PWR78-NEXT: vcmpgtsw v4, v3, v2
+; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
+; CHECK-PWR78-NEXT: blr
%3 = icmp slt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
@@ -1880,14 +1901,21 @@ define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
}
define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_sle:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpgtsw v4, v2, v3
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: vsubuwm v4, v2, v3
-; CHECK-NEXT: vsubuwm v2, v3, v2
-; CHECK-NEXT: xxsel v2, v4, v2, vs0
-; CHECK-NEXT: blr
+; CHECK-PWR9-LABEL: absd_int32_sle:
+; CHECK-PWR9: # %bb.0:
+; CHECK-PWR9-NEXT: xvnegsp v3, v3
+; CHECK-PWR9-NEXT: xvnegsp v2, v2
+; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT: blr
+;
+; CHECK-PWR78-LABEL: absd_int32_sle:
+; CHECK-PWR78: # %bb.0:
+; CHECK-PWR78-NEXT: vcmpgtsw v4, v2, v3
+; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
+; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT: xxsel v2, v4, v2, vs0
+; CHECK-PWR78-NEXT: blr
%3 = icmp sle <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 6533e2cc20b3d..a48781c6ebf8e 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -799,38 +799,17 @@ define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE42-LABEL: abd_cmp_v16i8:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pcmpgtb %xmm1, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubb %xmm1, %xmm3
-; SSE42-NEXT: psubb %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: pminsb %xmm1, %xmm2
+; SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; SSE42-NEXT: psubb %xmm2, %xmm0
; SSE42-NEXT: retq
;
-; AVX1-LABEL: abd_cmp_v16i8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v16i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: abd_cmp_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v16i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp sgt <16 x i8> %a, %b
%ab = sub <16 x i8> %a, %b
%ba = sub <16 x i8> %b, %a
@@ -839,54 +818,20 @@ define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
}
define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psubw %xmm1, %xmm3
-; SSE2-NEXT: psubw %xmm0, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: abd_cmp_v8i16:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm1, %xmm2
-; SSE42-NEXT: pcmpgtw %xmm0, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubw %xmm1, %xmm3
-; SSE42-NEXT: psubw %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movdqa %xmm3, %xmm0
-; SSE42-NEXT: retq
-;
-; AVX1-LABEL: abd_cmp_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
+; SSE-LABEL: abd_cmp_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pminsw %xmm1, %xmm2
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: psubw %xmm2, %xmm0
+; SSE-NEXT: retq
;
-; AVX512-LABEL: abd_cmp_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp sge <8 x i16> %a, %b
%ab = sub <8 x i16> %a, %b
%ba = sub <8 x i16> %b, %a
@@ -910,39 +855,18 @@ define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
;
; SSE42-LABEL: abd_cmp_v4i32:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm1, %xmm2
-; SSE42-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubd %xmm1, %xmm3
-; SSE42-NEXT: psubd %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: blendvps %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movaps %xmm3, %xmm0
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pminsd %xmm1, %xmm2
+; SSE42-NEXT: pmaxsd %xmm1, %xmm0
+; SSE42-NEXT: psubd %xmm2, %xmm0
; SSE42-NEXT: retq
;
-; AVX1-LABEL: abd_cmp_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: abd_cmp_v4i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm2 {%k1}
-; AVX512-NEXT: vmovdqa %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp slt <4 x i32> %a, %b
%ab = sub <4 x i32> %a, %b
%ba = sub <4 x i32> %b, %a
@@ -976,38 +900,37 @@ define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; SSE42-LABEL: abd_cmp_v2i64:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm1, %xmm2
-; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm3
; SSE42-NEXT: psubq %xmm1, %xmm3
; SSE42-NEXT: psubq %xmm0, %xmm1
; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movapd %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: abd_cmp_v2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq
%cmp = icmp sge <2 x i64> %a, %b
%ab = sub <2 x i64> %a, %b
@@ -1174,44 +1097,47 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: psubq %xmm1, %xmm2
-; SSE42-NEXT: movdqa %xmm1, %xmm3
-; SSE42-NEXT: psubq %xmm0, %xmm3
-; SSE42-NEXT: pcmpgtq %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm1, %xmm0
-; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: movdqa %xmm2, %xmm3
+; SSE42-NEXT: psubq %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm1, %xmm4
+; SSE42-NEXT: psubq %xmm2, %xmm4
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm4
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm1
; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
; SSE42-NEXT: pxor %xmm1, %xmm0
-; SSE42-NEXT: paddq %xmm2, %xmm0
+; SSE42-NEXT: paddq %xmm4, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm4
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpaddq %xmm2, %xmm0, %xmm0
@@ -1226,60 +1152,25 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
}
define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pcmpgtw %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psubw %xmm1, %xmm3
-; SSE2-NEXT: psubw %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm1, %xmm2
-; SSE2-NEXT: por %xmm0, %xmm2
-; SSE2-NEXT: paddw %xmm3, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pcmpgtw %xmm1, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubw %xmm1, %xmm3
-; SSE42-NEXT: psubw %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: paddw %xmm1, %xmm3
-; SSE42-NEXT: movdqa %xmm3, %xmm0
-; SSE42-NEXT: retq
-;
-; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpaddw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpaddw %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
+; SSE-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: psubw %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pminsw %xmm1, %xmm3
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: psubw %xmm3, %xmm0
+; SSE-NEXT: paddw %xmm2, %xmm0
+; SSE-NEXT: retq
;
-; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT: vpaddw %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; AVX-NEXT: retq
%cmp = icmp sgt <8 x i16> %a, %b
%ab = sub <8 x i16> %a, %b
%ba = sub <8 x i16> %b, %a
diff --git a/llvm/test/CodeGen/X86/abds-vector-256.ll b/llvm/test/CodeGen/X86/abds-vector-256.ll
index b9bd875cee76e..78190d2cb7d8b 100644
--- a/llvm/test/CodeGen/X86/abds-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-256.ll
@@ -441,34 +441,27 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm5
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT: vandps %ymm4, %ymm5, %ymm1
-; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v32i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v32i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubb %ymm0, %ymm1, %ymm3
-; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp sgt <32 x i8> %a, %b
%ab = sub <32 x i8> %a, %b
@@ -480,36 +473,29 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: abd_cmp_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm5
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm1
-; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v16i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubw %ymm0, %ymm1, %ymm3
-; AVX512-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp sge <16 x i16> %a, %b
%ab = sub <16 x i16> %a, %b
@@ -521,33 +507,29 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: abd_cmp_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm5
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm7
-; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm6, %xmm0
-; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm1
-; AVX1-NEXT: vblendvps %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm2 {%k1}
-; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp slt <8 x i32> %a, %b
%ab = sub <8 x i32> %a, %b
@@ -559,33 +541,32 @@ define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: abd_cmp_v4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vpsubq %xmm4, %xmm3, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm5
+; AVX1-NEXT: vpsubq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm4
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm6, %xmm0
-; AVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm1
-; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm3
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltq %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpsubq %ymm0, %ymm1, %ymm2
-; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm2 {%k1}
-; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp sge <4 x i64> %a, %b
%ab = sub <4 x i64> %a, %b
diff --git a/llvm/test/CodeGen/X86/abds-vector-512.ll b/llvm/test/CodeGen/X86/abds-vector-512.ll
index 65daad55c5cd8..359b962a152b2 100644
--- a/llvm/test/CodeGen/X86/abds-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-512.ll
@@ -267,26 +267,22 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: abd_cmp_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: abd_cmp_v64i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT: vpsubb %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT: vpsubb %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpminsb %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpminsb %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpternlogq $184, %zmm5, %zmm4, %zmm0
; AVX512DQ-NEXT: retq
%cmp = icmp sgt <64 x i8> %a, %b
%ab = sub <64 x i8> %a, %b
@@ -298,26 +294,22 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512BW-LABEL: abd_cmp_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: abd_cmp_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm5
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT: vpsubw %ymm3, %ymm2, %ymm5
-; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT: vpsubw %ymm2, %ymm3, %ymm2
-; AVX512DQ-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512DQ-NEXT: vpminsw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpminsw %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
; AVX512DQ-NEXT: retq
%cmp = icmp sge <32 x i16> %a, %b
%ab = sub <32 x i16> %a, %b
@@ -329,10 +321,9 @@ define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; AVX512-LABEL: abd_cmp_v16i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0
; AVX512-NEXT: retq
%cmp = icmp slt <16 x i32> %a, %b
%ab = sub <16 x i32> %a, %b
@@ -344,10 +335,9 @@ define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; AVX512-LABEL: abd_cmp_v8i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltq %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpsubq %zmm0, %zmm1, %zmm2
-; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0
; AVX512-NEXT: retq
%cmp = icmp sge <8 x i64> %a, %b
%ab = sub <8 x i64> %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu-vector-128.ll b/llvm/test/CodeGen/X86/abdu-vector-128.ll
index 910fe49d33a0e..88496032aa307 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-128.ll
@@ -641,59 +641,20 @@ define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
-; SSE2-LABEL: abd_cmp_v16i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pminub %xmm1, %xmm2
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psubb %xmm1, %xmm3
-; SSE2-NEXT: psubb %xmm0, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: abd_cmp_v16i8:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pminub %xmm1, %xmm2
-; SSE42-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubb %xmm1, %xmm3
-; SSE42-NEXT: psubb %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movdqa %xmm3, %xmm0
-; SSE42-NEXT: retq
-;
-; AVX1-LABEL: abd_cmp_v16i8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v16i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
+; SSE-LABEL: abd_cmp_v16i8:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pminub %xmm1, %xmm2
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: psubb %xmm2, %xmm0
+; SSE-NEXT: retq
;
-; AVX512-LABEL: abd_cmp_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v16i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp ugt <16 x i8> %a, %b
%ab = sub <16 x i8> %a, %b
%ba = sub <16 x i8> %b, %a
@@ -720,42 +681,17 @@ define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE42-LABEL: abd_cmp_v8i16:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pmaxuw %xmm1, %xmm2
-; SSE42-NEXT: pcmpeqw %xmm0, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubw %xmm1, %xmm3
-; SSE42-NEXT: psubw %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: pminuw %xmm1, %xmm2
+; SSE42-NEXT: pmaxuw %xmm1, %xmm0
+; SSE42-NEXT: psubw %xmm2, %xmm0
; SSE42-NEXT: retq
;
-; AVX1-LABEL: abd_cmp_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: abd_cmp_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp uge <8 x i16> %a, %b
%ab = sub <8 x i16> %a, %b
%ba = sub <8 x i16> %b, %a
@@ -783,41 +719,17 @@ define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE42-LABEL: abd_cmp_v4i32:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pmaxud %xmm1, %xmm2
-; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubd %xmm1, %xmm3
-; SSE42-NEXT: psubd %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: blendvps %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: movaps %xmm1, %xmm0
+; SSE42-NEXT: pminud %xmm1, %xmm2
+; SSE42-NEXT: pmaxud %xmm1, %xmm0
+; SSE42-NEXT: psubd %xmm2, %xmm0
; SSE42-NEXT: retq
;
-; AVX1-LABEL: abd_cmp_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: abd_cmp_v4i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpltud %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm2 {%k1}
-; AVX512-NEXT: vmovdqa %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp = icmp ult <4 x i32> %a, %b
%ab = sub <4 x i32> %a, %b
%ba = sub <4 x i32> %b, %a
@@ -852,46 +764,45 @@ define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE42-LABEL: abd_cmp_v2i64:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: movdqa %xmm1, %xmm3
; SSE42-NEXT: pxor %xmm2, %xmm3
-; SSE42-NEXT: pxor %xmm1, %xmm2
+; SSE42-NEXT: pxor %xmm0, %xmm2
; SSE42-NEXT: pcmpgtq %xmm3, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm3
; SSE42-NEXT: psubq %xmm1, %xmm3
; SSE42-NEXT: psubq %xmm0, %xmm1
; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movapd %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: abd_cmp_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltuq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq
%cmp = icmp uge <2 x i64> %a, %b
%ab = sub <2 x i64> %a, %b
@@ -932,50 +843,52 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
;
; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: psubq %xmm1, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; SSE42-NEXT: movdqa %xmm1, %xmm3
-; SSE42-NEXT: psubq %xmm0, %xmm3
-; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; SSE42-NEXT: pxor %xmm4, %xmm1
-; SSE42-NEXT: pxor %xmm4, %xmm0
-; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
-; SSE42-NEXT: paddq %xmm3, %xmm0
+; SSE42-NEXT: pxor %xmm2, %xmm3
+; SSE42-NEXT: pxor %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubq %xmm1, %xmm3
+; SSE42-NEXT: psubq %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: paddq %xmm1, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
-; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
-; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm2 {%k1}
-; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
%cmp = icmp ugt <2 x i64> %a, %b
%ab = sub <2 x i64> %a, %b
@@ -1007,46 +920,22 @@ define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwin
; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: pmaxuw %xmm1, %xmm2
-; SSE42-NEXT: pcmpeqw %xmm0, %xmm2
+; SSE42-NEXT: psubw %xmm1, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: psubw %xmm1, %xmm3
-; SSE42-NEXT: psubw %xmm0, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: paddw %xmm1, %xmm3
-; SSE42-NEXT: movdqa %xmm3, %xmm0
+; SSE42-NEXT: pminuw %xmm1, %xmm3
+; SSE42-NEXT: pmaxuw %xmm1, %xmm0
+; SSE42-NEXT: psubw %xmm3, %xmm0
+; SSE42-NEXT: paddw %xmm2, %xmm0
; SSE42-NEXT: retq
;
-; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpaddw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpaddw %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
-; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512-NEXT: vpaddw %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; AVX-NEXT: retq
%cmp = icmp uge <8 x i16> %a, %b
%ab = sub <8 x i16> %a, %b
%ba = sub <8 x i16> %b, %a
diff --git a/llvm/test/CodeGen/X86/abdu-vector-256.ll b/llvm/test/CodeGen/X86/abdu-vector-256.ll
index be6c7442bf0ac..3957133574ce5 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-256.ll
@@ -466,37 +466,26 @@ define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm5
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm1
-; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v32i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubb %ymm0, %ymm1, %ymm3
-; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm1
-; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp ugt <32 x i8> %a, %b
%ab = sub <32 x i8> %a, %b
@@ -510,38 +499,27 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
-; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm5
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0
-; AVX1-NEXT: vandps %ymm4, %ymm5, %ymm1
-; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v16i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubw %ymm0, %ymm1, %ymm3
-; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
-; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp uge <16 x i16> %a, %b
%ab = sub <16 x i16> %a, %b
@@ -553,36 +531,29 @@ define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: abd_cmp_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpmaxud %xmm3, %xmm4, %xmm5
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm6
-; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm7
-; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvps %xmm2, %xmm6, %xmm0, %xmm0
-; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm1
-; AVX1-NEXT: vblendvps %xmm5, %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpltud %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm2
-; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm2 {%k1}
-; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp ult <8 x i32> %a, %b
%ab = sub <8 x i32> %a, %b
@@ -594,41 +565,40 @@ define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: abd_cmp_v4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT: vpxor %xmm2, %xmm6, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpsubq %xmm6, %xmm4, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm6
+; AVX1-NEXT: vpsubq %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm4, %xmm6, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm4
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm5, %xmm0
-; AVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm1
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm3
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltuq %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpsubq %ymm0, %ymm1, %ymm2
-; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm2 {%k1}
-; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp uge <4 x i64> %a, %b
%ab = sub <4 x i64> %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu-vector-512.ll b/llvm/test/CodeGen/X86/abdu-vector-512.ll
index 19af5d3694f42..a855bea61e530 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-512.ll
@@ -267,10 +267,9 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: abd_cmp_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: abd_cmp_v64i8:
@@ -278,17 +277,12 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512DQ-NEXT: vpminub %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpminub %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm5
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT: vpsubb %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT: vpsubb %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpmaxub %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpminub %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
; AVX512DQ-NEXT: retq
%cmp = icmp ugt <64 x i8> %a, %b
%ab = sub <64 x i8> %a, %b
@@ -300,28 +294,22 @@ define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512BW-LABEL: abd_cmp_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: abd_cmp_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT: vpmaxuw %ymm2, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpmaxuw %ymm1, %ymm0, %ymm5
-; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm5
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
-; AVX512DQ-NEXT: vpsubw %ymm2, %ymm3, %ymm5
-; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm6
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
-; AVX512DQ-NEXT: vpsubw %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpminuw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpmaxuw %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpminuw %ymm1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpternlogq $184, %zmm5, %zmm4, %zmm0
; AVX512DQ-NEXT: retq
%cmp = icmp uge <32 x i16> %a, %b
%ab = sub <32 x i16> %a, %b
@@ -333,10 +321,9 @@ define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; AVX512-LABEL: abd_cmp_v16i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpltud %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0
; AVX512-NEXT: retq
%cmp = icmp ult <16 x i32> %a, %b
%ab = sub <16 x i32> %a, %b
@@ -348,10 +335,9 @@ define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; AVX512-LABEL: abd_cmp_v8i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpnltuq %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpsubq %zmm0, %zmm1, %zmm2
-; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0
; AVX512-NEXT: retq
%cmp = icmp uge <8 x i64> %a, %b
%ab = sub <8 x i64> %a, %b
More information about the llvm-commits
mailing list