[llvm] e2faf72 - [x86] add tests for psubus; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 19 13:43:45 PDT 2021
Author: Sanjay Patel
Date: 2021-10-19T16:41:18-04:00
New Revision: e2faf721b2b9597ed4a68f52fdffb14b6cb7db7c
URL: https://github.com/llvm/llvm-project/commit/e2faf721b2b9597ed4a68f52fdffb14b6cb7db7c
DIFF: https://github.com/llvm/llvm-project/commit/e2faf721b2b9597ed4a68f52fdffb14b6cb7db7c.diff
LOG: [x86] add tests for psubus; NFC
Added:
Modified:
llvm/test/CodeGen/X86/psubus.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index d84c873d63ee..197e18331fa8 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -9,6 +9,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
+
define <8 x i16> @test1(<8 x i16> %x) nounwind {
; SSE-LABEL: test1:
; SSE: # %bb.0: # %vector.ph
@@ -90,7 +92,7 @@ define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x
ret <16 x i8> %res
}
-define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %p2) nounwind {
+define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
; SSE-LABEL: ashr_xor_and_custom:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
@@ -125,6 +127,48 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %
ret <4 x i32> %res
}
+define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
+; SSE2OR3-LABEL: usubsat_custom:
+; SSE2OR3: # %bb.0:
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT: pxor %xmm0, %xmm1
+; SSE2OR3-NEXT: pxor %xmm2, %xmm2
+; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE2OR3-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2OR3-NEXT: pand %xmm1, %xmm0
+; SSE2OR3-NEXT: retq
+;
+; SSE41-LABEL: usubsat_custom:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
+; SSE41-NEXT: pmaxud %xmm1, %xmm0
+; SSE41-NEXT: psubd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: usubsat_custom:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: usubsat_custom:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: usubsat_custom:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
+ ret <4 x i32> %res
+}
+
define <8 x i16> @test2(<8 x i16> %x) nounwind {
; SSE-LABEL: test2:
; SSE: # %bb.0: # %vector.ph
@@ -302,6 +346,47 @@ vector.ph:
ret <16 x i16> %res
}
+define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
+; SSE-LABEL: ashr_xor_and_v16i16:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psraw $15, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: psraw $15, %xmm3
+; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ashr_xor_and_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ashr_xor_and_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
+; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ashr_xor_and_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
+; AVX512-NEXT: vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+ %res = and <16 x i16> %signsplat, %flipsign
+ ret <16 x i16> %res
+}
+
define <16 x i16> @test8(<16 x i16> %x) nounwind {
; SSE-LABEL: test8:
; SSE: # %bb.0: # %vector.ph
More information about the llvm-commits
mailing list