[llvm] 561d9cd - [X86] Add vector test coverage for select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 26 09:08:13 PST 2023
Author: Simon Pilgrim
Date: 2023-02-26T17:05:46Z
New Revision: 561d9cd6f457827bb81fcd036343ad14ec70f89c
URL: https://github.com/llvm/llvm-project/commit/561d9cd6f457827bb81fcd036343ad14ec70f89c
DIFF: https://github.com/llvm/llvm-project/commit/561d9cd6f457827bb81fcd036343ad14ec70f89c.diff
LOG: [X86] Add vector test coverage for select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Added:
Modified:
llvm/test/CodeGen/X86/abds-vector-128.ll
llvm/test/CodeGen/X86/abds-vector-256.ll
llvm/test/CodeGen/X86/abds-vector-512.ll
llvm/test/CodeGen/X86/abdu-vector-128.ll
llvm/test/CodeGen/X86/abdu-vector-256.ll
llvm/test/CodeGen/X86/abdu-vector-512.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 1b493fe8c04f..92a91d09fda6 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -785,6 +785,244 @@ define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
ret <2 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b)
+;
+
+define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v16i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubb %xmm1, %xmm3
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v16i8:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubb %xmm1, %xmm3
+; SSE42-NEXT: psubb %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sgt <16 x i8> %a, %b
+ %ab = sub <16 x i8> %a, %b
+ %ba = sub <16 x i8> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i8> %ab, <16 x i8> %ba
+ ret <16 x i8> %sel
+}
+
+define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubw %xmm1, %xmm3
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v8i16:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtw %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubw %xmm1, %xmm3
+; SSE42-NEXT: psubw %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v8i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <8 x i16> %a, %b
+ %ab = sub <8 x i16> %a, %b
+ %ba = sub <8 x i16> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
+ ret <8 x i16> %sel
+}
+
+define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubd %xmm1, %xmm3
+; SSE2-NEXT: psubd %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v4i32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubd %xmm1, %xmm3
+; SSE42-NEXT: psubd %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: blendvps %xmm0, %xmm1, %xmm3
+; SSE42-NEXT: movaps %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
+; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp slt <4 x i32> %a, %b
+ %ab = sub <4 x i32> %a, %b
+ %ba = sub <4 x i32> %b, %a
+ %sel = select <4 x i1> %cmp, <4 x i32> %ba, <4 x i32> %ab
+ ret <4 x i32> %sel
+}
+
+define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v2i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubq %xmm1, %xmm3
+; SSE2-NEXT: psubq %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v2i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubq %xmm1, %xmm3
+; SSE42-NEXT: psubq %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <2 x i64> %a, %b
+ %ab = sub <2 x i64> %a, %b
+ %ba = sub <2 x i64> %b, %a
+ %sel = select <2 x i1> %cmp, <2 x i64> %ba, <2 x i64> %ab
+ ret <2 x i64> %sel
+}
+
;
; abs(sub_nsw(x, y)) -> abds(a,b)
;
diff --git a/llvm/test/CodeGen/X86/abds-vector-256.ll b/llvm/test/CodeGen/X86/abds-vector-256.ll
index e4f4dc4d738e..880ce6434b67 100644
--- a/llvm/test/CodeGen/X86/abds-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-256.ll
@@ -432,6 +432,168 @@ define <4 x i64> @abd_minmax_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
ret <4 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b)
+;
+
+define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm5
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0
+; AVX1-NEXT: vandps %ymm4, %ymm5, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubb %ymm0, %ymm1, %ymm3
+; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp sgt <32 x i8> %a, %b
+ %ab = sub <32 x i8> %a, %b
+ %ba = sub <32 x i8> %b, %a
+ %sel = select <32 x i1> %cmp, <32 x i8> %ab, <32 x i8> %ba
+ ret <32 x i8> %sel
+}
+
+define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm5
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm4, %ymm0
+; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubw %ymm0, %ymm1, %ymm3
+; AVX512-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <16 x i16> %a, %b
+ %ab = sub <16 x i16> %a, %b
+ %ba = sub <16 x i16> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i16> %ab, <16 x i16> %ba
+ ret <16 x i16> %sel
+}
+
+define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm5
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm7
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm1
+; AVX1-NEXT: vblendvps %xmm5, %xmm1, %xmm7, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp slt <8 x i32> %a, %b
+ %ab = sub <8 x i32> %a, %b
+ %ba = sub <8 x i32> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i32> %ba, <8 x i32> %ab
+ ret <8 x i32> %sel
+}
+
+define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vpsubq %xmm4, %xmm3, %xmm7
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm6, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm1
+; AVX1-NEXT: vblendvpd %xmm5, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltq %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubq %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <4 x i64> %a, %b
+ %ab = sub <4 x i64> %a, %b
+ %ba = sub <4 x i64> %b, %a
+ %sel = select <4 x i1> %cmp, <4 x i64> %ba, <4 x i64> %ab
+ ret <4 x i64> %sel
+}
+
;
; abs(sub_nsw(x, y)) -> abds(a,b)
;
diff --git a/llvm/test/CodeGen/X86/abds-vector-512.ll b/llvm/test/CodeGen/X86/abds-vector-512.ll
index d19ff6edd78b..b21cc31b9d23 100644
--- a/llvm/test/CodeGen/X86/abds-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-512.ll
@@ -260,6 +260,102 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
ret <8 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b)
+;
+
+define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512BW-LABEL: abd_cmp_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: abd_cmp_v64i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
+; AVX512DQ-NEXT: vpsubb %ymm2, %ymm3, %ymm5
+; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm6
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpternlogq $184, %zmm5, %zmm4, %zmm0
+; AVX512DQ-NEXT: retq
+ %cmp = icmp sgt <64 x i8> %a, %b
+ %ab = sub <64 x i8> %a, %b
+ %ba = sub <64 x i8> %b, %a
+ %sel = select <64 x i1> %cmp, <64 x i8> %ab, <64 x i8> %ba
+ ret <64 x i8> %sel
+}
+
+define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512BW-LABEL: abd_cmp_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: abd_cmp_v32i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm5
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
+; AVX512DQ-NEXT: vpsubw %ymm3, %ymm2, %ymm5
+; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm6
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; AVX512DQ-NEXT: vpsubw %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
+; AVX512DQ-NEXT: retq
+ %cmp = icmp sge <32 x i16> %a, %b
+ %ab = sub <32 x i16> %a, %b
+ %ba = sub <32 x i16> %b, %a
+ %sel = select <32 x i1> %cmp, <32 x i16> %ab, <32 x i16> %ba
+ ret <32 x i16> %sel
+}
+
+define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; AVX512-LABEL: abd_cmp_v16i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %cmp = icmp slt <16 x i32> %a, %b
+ %ab = sub <16 x i32> %a, %b
+ %ba = sub <16 x i32> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i32> %ba, <16 x i32> %ab
+ ret <16 x i32> %sel
+}
+
+define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; AVX512-LABEL: abd_cmp_v8i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltq %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpsubq %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <8 x i64> %a, %b
+ %ab = sub <8 x i64> %a, %b
+ %ba = sub <8 x i64> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i64> %ba, <8 x i64> %ab
+ ret <8 x i64> %sel
+}
+
;
; abs(sub_nsw(x, y)) -> abds(a,b)
;
diff --git a/llvm/test/CodeGen/X86/abdu-vector-128.ll b/llvm/test/CodeGen/X86/abdu-vector-128.ll
index 69c999ae18ae..05a9f3ef7ac0 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-128.ll
@@ -642,6 +642,270 @@ define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
ret <2 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
+;
+
+define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v16i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pminub %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubb %xmm1, %xmm3
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v16i8:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pminub %xmm1, %xmm2
+; SSE42-NEXT: pcmpeqb %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubb %xmm1, %xmm3
+; SSE42-NEXT: psubb %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp ugt <16 x i8> %a, %b
+ %ab = sub <16 x i8> %a, %b
+ %ba = sub <16 x i8> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i8> %ab, <16 x i8> %ba
+ ret <16 x i8> %sel
+}
+
+define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubusw %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubw %xmm1, %xmm3
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v8i16:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pmaxuw %xmm1, %xmm2
+; SSE42-NEXT: pcmpeqw %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubw %xmm1, %xmm3
+; SSE42-NEXT: psubw %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v8i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <8 x i16> %a, %b
+ %ab = sub <8 x i16> %a, %b
+ %ba = sub <8 x i16> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
+ ret <8 x i16> %sel
+}
+
+define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubd %xmm1, %xmm3
+; SSE2-NEXT: psubd %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v4i32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pmaxud %xmm1, %xmm2
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubd %xmm1, %xmm3
+; SSE42-NEXT: psubd %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: blendvps %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movaps %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpltud %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp ult <4 x i32> %a, %b
+ %ab = sub <4 x i32> %a, %b
+ %ba = sub <4 x i32> %b, %a
+ %sel = select <4 x i1> %cmp, <4 x i32> %ba, <4 x i32> %ab
+ ret <4 x i32> %sel
+}
+
+define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v2i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubq %xmm1, %xmm3
+; SSE2-NEXT: psubq %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v2i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: pxor %xmm2, %xmm3
+; SSE42-NEXT: pxor %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubq %xmm1, %xmm3
+; SSE42-NEXT: psubq %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltuq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <2 x i64> %a, %b
+ %ab = sub <2 x i64> %a, %b
+ %ba = sub <2 x i64> %b, %a
+ %sel = select <2 x i1> %cmp, <2 x i64> %ba, <2 x i64> %ab
+ ret <2 x i64> %sel
+}
+
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
diff --git a/llvm/test/CodeGen/X86/abdu-vector-256.ll b/llvm/test/CodeGen/X86/abdu-vector-256.ll
index 2c87a3846b44..d4e9d8acc97f 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-256.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-256.ll
@@ -456,6 +456,187 @@ define <4 x i64> @abd_minmax_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
ret <4 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
+;
+
+define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm5
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm4, %ymm0
+; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubb %ymm0, %ymm1, %ymm3
+; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp ugt <32 x i8> %a, %b
+ %ab = sub <32 x i8> %a, %b
+ %ba = sub <32 x i8> %b, %a
+ %sel = select <32 x i1> %cmp, <32 x i8> %ab, <32 x i8> %ba
+ ret <32 x i8> %sel
+}
+
+define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm5
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0
+; AVX1-NEXT: vandps %ymm4, %ymm5, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubw %ymm0, %ymm1, %ymm3
+; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
+; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <16 x i16> %a, %b
+ %ab = sub <16 x i16> %a, %b
+ %ba = sub <16 x i16> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i16> %ab, <16 x i16> %ba
+ ret <16 x i16> %sel
+}
+
+define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpmaxud %xmm3, %xmm4, %xmm5
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm7
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvps %xmm2, %xmm6, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vblendvps %xmm5, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpltud %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp ult <8 x i32> %a, %b
+ %ab = sub <8 x i32> %a, %b
+ %ba = sub <8 x i32> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i32> %ba, <8 x i32> %ab
+ ret <8 x i32> %sel
+}
+
+define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: abd_cmp_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vpxor %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm5
+; AVX1-NEXT: vpsubq %xmm6, %xmm4, %xmm7
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm3, %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm1
+; AVX1-NEXT: vblendvpd %xmm2, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm3
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vblendvpd %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltuq %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpsubq %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <4 x i64> %a, %b
+ %ab = sub <4 x i64> %a, %b
+ %ba = sub <4 x i64> %b, %a
+ %sel = select <4 x i1> %cmp, <4 x i64> %ba, <4 x i64> %ab
+ ret <4 x i64> %sel
+}
+
declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
diff --git a/llvm/test/CodeGen/X86/abdu-vector-512.ll b/llvm/test/CodeGen/X86/abdu-vector-512.ll
index 915e82f04f96..d10fc73c9946 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-512.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-512.ll
@@ -260,6 +260,106 @@ define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
ret <8 x i64> %sub
}
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
+;
+
+define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512BW-LABEL: abd_cmp_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: abd_cmp_v64i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512DQ-NEXT: vpminub %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpminub %ymm1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm5
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
+; AVX512DQ-NEXT: vpsubb %ymm2, %ymm3, %ymm5
+; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm6
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsubb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
+; AVX512DQ-NEXT: retq
+ %cmp = icmp ugt <64 x i8> %a, %b
+ %ab = sub <64 x i8> %a, %b
+ %ba = sub <64 x i8> %b, %a
+ %sel = select <64 x i1> %cmp, <64 x i8> %ab, <64 x i8> %ba
+ ret <64 x i8> %sel
+}
+
+define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512BW-LABEL: abd_cmp_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k1
+; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: abd_cmp_v32i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512DQ-NEXT: vpmaxuw %ymm2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpmaxuw %ymm1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm5
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
+; AVX512DQ-NEXT: vpsubw %ymm2, %ymm3, %ymm5
+; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm6
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; AVX512DQ-NEXT: vpsubw %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsubw %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpternlogq $184, %zmm5, %zmm4, %zmm0
+; AVX512DQ-NEXT: retq
+ %cmp = icmp uge <32 x i16> %a, %b
+ %ab = sub <32 x i16> %a, %b
+ %ba = sub <32 x i16> %b, %a
+ %sel = select <32 x i1> %cmp, <32 x i16> %ab, <32 x i16> %ba
+ ret <32 x i16> %sel
+}
+
+define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; AVX512-LABEL: abd_cmp_v16i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpltud %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %cmp = icmp ult <16 x i32> %a, %b
+ %ab = sub <16 x i32> %a, %b
+ %ba = sub <16 x i32> %b, %a
+ %sel = select <16 x i1> %cmp, <16 x i32> %ba, <16 x i32> %ab
+ ret <16 x i32> %sel
+}
+
+define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; AVX512-LABEL: abd_cmp_v8i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltuq %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vpsubq %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <8 x i64> %a, %b
+ %ab = sub <8 x i64> %a, %b
+ %ba = sub <8 x i64> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i64> %ba, <8 x i64> %ab
+ ret <8 x i64> %sel
+}
+
declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)
declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1)
declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1)
More information about the llvm-commits
mailing list