[llvm] d91c20a - [X86] Add additional uses tests for select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 26 10:04:27 PST 2023
Author: Simon Pilgrim
Date: 2023-02-26T18:01:05Z
New Revision: d91c20a0de70bc52695650360048f9a5b19e1664
URL: https://github.com/llvm/llvm-project/commit/d91c20a0de70bc52695650360048f9a5b19e1664
DIFF: https://github.com/llvm/llvm-project/commit/d91c20a0de70bc52695650360048f9a5b19e1664.diff
LOG: [X86] Add additional uses tests for select(icmp(x,y),sub(x,y),sub(y,x)) -> abd(x,y) patterns
Added:
Modified:
llvm/test/CodeGen/X86/abds-vector-128.ll
llvm/test/CodeGen/X86/abdu-vector-128.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 92a91d09fda6..1b412e4dba2c 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -1148,6 +1148,153 @@ define <2 x i64> @abd_subnsw_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
ret <2 x i64> %abs
}
+;
+; Special cases
+;
+
+define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubq %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubq %xmm0, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm1
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: paddq %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: psubq %xmm1, %xmm2
+; SSE42-NEXT: movdqa %xmm1, %xmm3
+; SSE42-NEXT: psubq %xmm0, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: paddq %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnltq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
+; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sge <2 x i64> %a, %b
+ %ab = sub <2 x i64> %a, %b
+ %ba = sub <2 x i64> %b, %a
+ %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba
+ %ext = sext <2 x i1> %cmp to <2 x i64>
+ %res = add <2 x i64> %ext, %sel
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubw %xmm1, %xmm3
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: paddw %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtw %xmm1, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubw %xmm1, %xmm3
+; SSE42-NEXT: psubw %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: paddw %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpaddw %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp sgt <8 x i16> %a, %b
+ %ab = sub <8 x i16> %a, %b
+ %ba = sub <8 x i16> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
+ %res = add <8 x i16> %ab, %sel
+ ret <8 x i16> %res
+}
+
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
diff --git a/llvm/test/CodeGen/X86/abdu-vector-128.ll b/llvm/test/CodeGen/X86/abdu-vector-128.ll
index 05a9f3ef7ac0..d5d1bd775bb6 100644
--- a/llvm/test/CodeGen/X86/abdu-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abdu-vector-128.ll
@@ -906,6 +906,161 @@ define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
ret <2 x i64> %sel
}
+;
+; Special cases
+;
+
+define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubq %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubq %xmm0, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: paddq %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: psubq %xmm1, %xmm2
+; SSE42-NEXT: movdqa %xmm1, %xmm3
+; SSE42-NEXT: psubq %xmm0, %xmm3
+; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm4, %xmm1
+; SSE42-NEXT: pxor %xmm4, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
+; SSE42-NEXT: paddq %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
+; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpsubq %xmm0, %xmm1, %xmm2
+; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm2 {%k1}
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp ugt <2 x i64> %a, %b
+ %ab = sub <2 x i64> %a, %b
+ %ba = sub <2 x i64> %b, %a
+ %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba
+ %ext = sext <2 x i1> %cmp to <2 x i64>
+ %res = add <2 x i64> %ext, %sel
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubusw %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubw %xmm1, %xmm3
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: paddw %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: abd_cmp_v8i16_multiuse_sub:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pmaxuw %xmm1, %xmm2
+; SSE42-NEXT: pcmpeqw %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: psubw %xmm1, %xmm3
+; SSE42-NEXT: psubw %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE42-NEXT: paddw %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm3, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm3
+; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpaddw %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: abd_cmp_v8i16_multiuse_sub:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vpsubw %xmm0, %xmm1, %xmm3
+; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %cmp = icmp uge <8 x i16> %a, %b
+ %ab = sub <8 x i16> %a, %b
+ %ba = sub <8 x i16> %b, %a
+ %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba
+ %res = add <8 x i16> %ab, %sel
+ ret <8 x i16> %res
+}
+
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
More information about the llvm-commits
mailing list