[llvm] 35c767a - [x86] add tests for variants of usubsat; NFC

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 24 05:20:41 PDT 2021


Author: Sanjay Patel
Date: 2021-10-24T08:08:30-04:00
New Revision: 35c767a685ee454d278e060359423ce0916daf05

URL: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05
DIFF: https://github.com/llvm/llvm-project/commit/35c767a685ee454d278e060359423ce0916daf05.diff

LOG: [x86] add tests for variants of usubsat; NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/psubus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 70f1bbb640588..a509fc597f925 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
   ret <8 x i16> %res
 }
 
+define <8 x i16> @ashr_add_and(<8 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psraw $15, %xmm1
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ashr_add_and:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
+; AVX-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %flipsign = add <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+  %res = and <8 x i16> %signsplat, %flipsign
+  ret <8 x i16> %res
+}
+
 ; negative test - extra uses may lead to extra instructions when custom-lowered
 
 define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
@@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
   ret <4 x i32> %res
 }
 
+define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
+; SSE-LABEL: ashr_add_and_custom:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: ashr_add_and_custom:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ashr_add_and_custom:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: ashr_add_and_custom:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX512-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
+  %flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+  %res = and <4 x i32> %flipsign, %signsplat
+  ret <4 x i32> %res
+}
+
 ; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))
 
 define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
@@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
   ret <16 x i16> %res
 }
 
+define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
+; SSE-LABEL: ashr_add_and_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psraw $15, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psraw $15, %xmm3
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: ashr_add_and_v16i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ashr_add_and_v16i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX2-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: ashr_add_and_v16i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX512-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %flipsign = add <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+  %res = and <16 x i16> %signsplat, %flipsign
+  ret <16 x i16> %res
+}
+
 define <16 x i16> @test8(<16 x i16> %x) nounwind {
 ; SSE-LABEL: test8:
 ; SSE:       # %bb.0: # %vector.ph


        


More information about the llvm-commits mailing list