[llvm] abf6692 - Tests for (and/or (icmp eq/ne A, C), (icmp eq/ne A, -C)) <--> (icmp eq/ne (ABS A), ABS(C)); NFC
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 16:59:25 PST 2023
Author: Noah Goldstein
Date: 2023-02-14T18:59:04-06:00
New Revision: abf6692f951339872fa78ac7a607c4a11a12b2e2
URL: https://github.com/llvm/llvm-project/commit/abf6692f951339872fa78ac7a607c4a11a12b2e2
DIFF: https://github.com/llvm/llvm-project/commit/abf6692f951339872fa78ac7a607c4a11a12b2e2.diff
LOG: Tests for (and/or (icmp eq/ne A, C), (icmp eq/ne A, -C)) <--> (icmp eq/ne (ABS A), ABS(C)); NFC
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D142600
Added:
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
llvm/test/CodeGen/X86/icmp-abs-C.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
new file mode 100644
index 000000000000..90e6e5becd49
--- /dev/null
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -0,0 +1,1178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <4 x i1> @illegal_abs_unchanged(<4 x i8> %x) {
+; AVX512-LABEL: illegal_abs_unchanged:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsb %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_unchanged:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsb %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_unchanged:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsb %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_unchanged:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: retq
+ %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
+ %cmp = icmp eq <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @illegal_abs_unchanged2(<4 x i8> %x) {
+; AVX512-LABEL: illegal_abs_unchanged2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsb %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_unchanged2:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsb %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_unchanged2:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsb %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_unchanged2:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: retq
+ %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
+ %cmp = icmp ne <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @illegal_abs_to_eq_or(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_eq_or:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_eq_or:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_eq_or:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm0, %xmm4
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
+; SSE41-NEXT: psubq %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: packssdw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_eq_or:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: packssdw %xmm3, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @illegal_abs_to_eq_or_sext(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_eq_or_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_eq_or_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_eq_or_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
+; SSE41-NEXT: psubq %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_eq_or_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_ne_and:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_ne_and:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_ne_and:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm0, %xmm4
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
+; SSE41-NEXT: psubq %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm2
+; SSE41-NEXT: packssdw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_ne_and:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: packssdw %xmm3, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @illegal_abs_to_ne_and_sext(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_ne_and_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_ne_and_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_ne_and_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
+; SSE41-NEXT: psubq %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm3, %xmm2
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_ne_and_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm1
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @legal_abs_eq_unchanged(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_eq_unchanged:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_eq_unchanged:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_eq_unchanged:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_eq_unchanged:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @legal_abs_eq_unchanged_sext(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_eq_unchanged_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_eq_unchanged_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_eq_unchanged_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_eq_unchanged_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @legal_abs_ne_unchangedd(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_ne_unchangedd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_ne_unchangedd:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_ne_unchangedd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_ne_unchangedd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_ne_unchangedd_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_ne_unchangedd_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_ne_unchangedd_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_ne_unchangedd_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm3, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x64_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x64_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x64_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm0
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm3, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psllq $63, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: xorps %xmm3, %xmm0
+; SSE41-NEXT: andnps %xmm0, %xmm2
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: xorps %xmm3, %xmm0
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x64_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x64_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: xorps %xmm3, %xmm0
+; SSE41-NEXT: andnps %xmm0, %xmm2
+; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm1
+; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: xorps %xmm3, %xmm2
+; SSE2-NEXT: andnps %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psllq $63, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x32_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x32_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x32_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x32_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x32_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x32_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x32_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x32_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
+ %cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i8> @eq_or_to_abs_vec4x8_sext(<4 x i8> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x8_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x8_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x8_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x8_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
+ %cmp2 = icmp eq <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i8>
+ ret <4 x i8> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: kandw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pmovsxbd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
+ %cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x16_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
+; AVX512-NEXT: kandw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x16_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
+ %cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i16>
+ ret <4 x i16> %r
+}
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C.ll b/llvm/test/CodeGen/X86/icmp-abs-C.ll
new file mode 100644
index 000000000000..842868d46da4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/icmp-abs-C.ll
@@ -0,0 +1,222 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64
+
+declare i64 @llvm.abs.i64(i64, i1)
+declare i32 @llvm.abs.i32(i32, i1)
+declare i16 @llvm.abs.i16(i16, i1)
+declare i8 @llvm.abs.i8(i8, i1)
+
+define i64 @eq_or_with_dom_abs(i64 %x) nounwind {
+; X86-LABEL: eq_or_with_dom_abs:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: subl %edi, %eax
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: xorl $12312, %eax # imm = 0x3018
+; X86-NEXT: addl $64, %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: andl $-129, %ecx
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: sete %cl
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: movl $2344, %edi # imm = 0x928
+; X86-NEXT: cmpl %eax, %edi
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: jb .LBB0_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movb %cl, %bl
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: .LBB0_2:
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-LABEL: eq_or_with_dom_abs:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rcx
+; X64-NEXT: negq %rcx
+; X64-NEXT: cmovsq %rdi, %rcx
+; X64-NEXT: xorq $12312, %rcx # imm = 0x3018
+; X64-NEXT: addq $64, %rdi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testq $-129, %rdi
+; X64-NEXT: sete %al
+; X64-NEXT: cmpq $2345, %rcx # imm = 0x929
+; X64-NEXT: cmovaeq %rcx, %rax
+; X64-NEXT: retq
+ %absx = call i64 @llvm.abs.i64(i64 %x, i1 true)
+ %foo = xor i64 %absx, 12312
+ %bar = icmp ugt i64 %foo, 2344
+ %cmp0 = icmp eq i64 %x, 64
+ %cmp1 = icmp eq i64 %x, -64
+ %cmp = or i1 %cmp0, %cmp1
+ %cmp64 = zext i1 %cmp to i64
+ %r = select i1 %bar, i64 %foo, i64 %cmp64
+ ret i64 %r
+}
+
+define i32 @eq_or_with_dom_abs_non_po2(i32 %x) nounwind {
+; X86-LABEL: eq_or_with_dom_abs_non_po2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: xorl $12312, %eax # imm = 0x3018
+; X86-NEXT: cmpl $123, %edx
+; X86-NEXT: sete %cl
+; X86-NEXT: cmpl $-123, %edx
+; X86-NEXT: sete %dl
+; X86-NEXT: cmpl $2345, %eax # imm = 0x929
+; X86-NEXT: jae .LBB1_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: orb %dl, %cl
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: .LBB1_2:
+; X86-NEXT: retl
+;
+; X64-LABEL: eq_or_with_dom_abs_non_po2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: cmovsl %edi, %ecx
+; X64-NEXT: xorl $12312, %ecx # imm = 0x3018
+; X64-NEXT: cmpl $123, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: cmpl $-123, %edi
+; X64-NEXT: sete %dl
+; X64-NEXT: orb %al, %dl
+; X64-NEXT: cmpl $2345, %ecx # imm = 0x929
+; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: retq
+ %absx = call i32 @llvm.abs.i32(i32 %x, i1 true)
+ %foo = xor i32 %absx, 12312
+ %bar = icmp ugt i32 %foo, 2344
+ %cmp0 = icmp eq i32 %x, 123
+ %cmp1 = icmp eq i32 %x, -123
+ %cmp = or i1 %cmp0, %cmp1
+ %cmp64 = zext i1 %cmp to i32
+ %r = select i1 %bar, i32 %foo, i32 %cmp64
+ ret i32 %r
+}
+
+define i8 @ne_and_with_dom_abs_non_pow2(i8 %x) nounwind {
+; X86-LABEL: ne_and_with_dom_abs_non_pow2:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sarb $7, %cl
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: xorb %cl, %al
+; X86-NEXT: subb %cl, %al
+; X86-NEXT: xorb $12, %al
+; X86-NEXT: cmpb $121, %dl
+; X86-NEXT: setne %cl
+; X86-NEXT: cmpb $-121, %dl
+; X86-NEXT: setne %dl
+; X86-NEXT: cmpb $24, %al
+; X86-NEXT: jae .LBB2_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: andb %dl, %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB2_2:
+; X86-NEXT: retl
+;
+; X64-LABEL: ne_and_with_dom_abs_non_pow2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: sarb $7, %al
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: xorb %al, %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: xorb $12, %cl
+; X64-NEXT: cmpb $121, %dil
+; X64-NEXT: setne %al
+; X64-NEXT: cmpb $-121, %dil
+; X64-NEXT: setne %dl
+; X64-NEXT: andb %al, %dl
+; X64-NEXT: cmpb $24, %cl
+; X64-NEXT: movzbl %dl, %edx
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: cmovbl %edx, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %absx = call i8 @llvm.abs.i8(i8 %x, i1 true)
+ %foo = xor i8 %absx, 12
+ %bar = icmp ugt i8 %foo, 23
+ %cmp0 = icmp ne i8 %x, 121
+ %cmp1 = icmp ne i8 %x, -121
+ %cmp = and i1 %cmp0, %cmp1
+ %cmp64 = zext i1 %cmp to i8
+ %r = select i1 %bar, i8 %foo, i8 %cmp64
+ ret i8 %r
+}
+
+define i16 @ne_and_with_dom_abs(i16 %x) nounwind {
+; X86-LABEL: ne_and_with_dom_abs:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movswl %cx, %edx
+; X86-NEXT: sarl $15, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: subl %edx, %eax
+; X86-NEXT: xorl $12312, %eax # imm = 0x3018
+; X86-NEXT: movzwl %ax, %esi
+; X86-NEXT: addl $64, %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testl $65407, %ecx # imm = 0xFF7F
+; X86-NEXT: setne %cl
+; X86-NEXT: cmpl $2345, %esi # imm = 0x929
+; X86-NEXT: jae .LBB3_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movb %cl, %dl
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: .LBB3_2:
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: ne_and_with_dom_abs:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negw %cx
+; X64-NEXT: cmovsw %di, %cx
+; X64-NEXT: xorl $12312, %ecx # imm = 0x3018
+; X64-NEXT: movzwl %cx, %edx
+; X64-NEXT: addl $64, %edi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testl $65407, %edi # imm = 0xFF7F
+; X64-NEXT: setne %al
+; X64-NEXT: cmpl $2345, %edx # imm = 0x929
+; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %absx = call i16 @llvm.abs.i16(i16 %x, i1 true)
+ %foo = xor i16 %absx, 12312
+ %bar = icmp ugt i16 %foo, 2344
+ %cmp0 = icmp ne i16 %x, 64
+ %cmp1 = icmp ne i16 %x, -64
+ %cmp = and i1 %cmp0, %cmp1
+ %cmp64 = zext i1 %cmp to i16
+ %r = select i1 %bar, i16 %foo, i16 %cmp64
+ ret i16 %r
+}
More information about the llvm-commits
mailing list