[llvm] 35c767a - [x86] add tests for variants of usubsat; NFC

Sun Oct 24 05:20:41 PDT 2021

Author: Sanjay Patel
Date: 2021-10-24T08:08:30-04:00
New Revision: 35c767a685ee454d278e060359423ce0916daf05

URL: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05
DIFF: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05.diff

LOG: [x86] add tests for variants of usubsat; NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/psubus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 70f1bbb640588..a509fc597f925 100644

--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
   ret <8 x i16> %res
 }
 
+define <8 x i16> @ashr_add_and(<8 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psraw $15, %xmm1
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ashr_add_and:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
+; AVX-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %flipsign = add <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+  %res = and <8 x i16> %signsplat, %flipsign
+  ret <8 x i16> %res
+}
+
 ; negative test - extra uses may lead to extra instructions when custom-lowered
 
 define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
@@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
   ret <4 x i32> %res
 }
 
+define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
+; SSE-LABEL: ashr_add_and_custom:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: ashr_add_and_custom:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ashr_add_and_custom:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: ashr_add_and_custom:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX512-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
+  %flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+  %res = and <4 x i32> %flipsign, %signsplat
+  ret <4 x i32> %res
+}
+
 ; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))
 
 define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
@@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
   ret <16 x i16> %res
 }
 
+define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psraw $15, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psraw $15, %xmm3
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: ashr_add_and_v16i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ashr_add_and_v16i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX2-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: ashr_add_and_v16i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX512-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %flipsign = add <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+  %res = and <16 x i16> %signsplat, %flipsign
+  ret <16 x i16> %res
+}
+
 define <16 x i16> @test8(<16 x i16> %x) nounwind {
 ; SSE-LABEL: test8:
 ; SSE:       # %bb.0: # %vector.ph