[llvm] 35c767a - [x86] add tests for variants of usubsat; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 24 05:20:41 PDT 2021
Author: Sanjay Patel
Date: 2021-10-24T08:08:30-04:00
New Revision: 35c767a685ee454d278e060359423ce0916daf05
URL: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05
DIFF: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05.diff
LOG: [x86] add tests for variants of usubsat; NFC
Added:
Modified:
llvm/test/CodeGen/X86/psubus.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 70f1bbb640588..a509fc597f925 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
ret <8 x i16> %res
}
+define <8 x i16> @ashr_add_and(<8 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psraw $15, %xmm1
+; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ashr_add_and:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
+; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %flipsign = add <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+ %res = and <8 x i16> %signsplat, %flipsign
+ ret <8 x i16> %res
+}
+
; negative test - extra uses may lead to extra instructions when custom-lowered
define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
@@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
ret <4 x i32> %res
}
+define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
+; SSE-LABEL: ashr_add_and_custom:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ashr_add_and_custom:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ashr_add_and_custom:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ashr_add_and_custom:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
+ %flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+ %res = and <4 x i32> %flipsign, %signsplat
+ ret <4 x i32> %res
+}
+
; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))
define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
@@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
ret <16 x i16> %res
}
+define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and_v16i16:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psraw $15, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: psraw $15, %xmm3
+; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ashr_add_and_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ashr_add_and_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
+; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ashr_add_and_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
+; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %flipsign = add <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+ %res = and <16 x i16> %signsplat, %flipsign
+ ret <16 x i16> %res
+}
+
define <16 x i16> @test8(<16 x i16> %x) nounwind {
; SSE-LABEL: test8:
; SSE: # %bb.0: # %vector.ph
More information about the llvm-commits
mailing list