[llvm] d9986fb - [X86] Add tests showing failure to pull common shuffles through add/sub sat intrinsics

Wed Apr 13 04:35:49 PDT 2022

Author: Simon Pilgrim
Date: 2022-04-13T12:35:38+01:00
New Revision: d9986fbfb6b741d4f8c261e122db8590f3eed820

URL: https://github.com/llvm/llvm-project/commit/d9986fbfb6b741d4f8c261e122db8590f3eed820
DIFF: https://github.com/llvm/llvm-project/commit/d9986fbfb6b741d4f8c261e122db8590f3eed820.diff

LOG: [X86] Add tests showing failure to pull common shuffles through add/sub sat intrinsics

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/combine-add-ssat.ll
    llvm/test/CodeGen/X86/combine-add-usat.ll
    llvm/test/CodeGen/X86/combine-sub-ssat.ll
    llvm/test/CodeGen/X86/combine-sub-usat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll
index 277a685131bf8..2da2622f802a9 100644

--- a/llvm/test/CodeGen/X86/combine-add-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll
@@ -159,3 +159,24 @@ define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
   %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
   ret <8 x i16> %3
 }
+
+; fold (sadd_sat (shuffle x, u, m), (shuffle y, u, m)) -> (shuffle (sadd_sat x, y), u, m)
+define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
+; SSE-LABEL: combine_shuffle_shuffle_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    paddsw %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_shuffle_shuffle_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %res = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x1, <8 x i16> %y1)
+  ret <8 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll
index 02937ea84f05f..1283a14f077bb 100644
--- a/llvm/test/CodeGen/X86/combine-add-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-usat.ll
@@ -152,3 +152,24 @@ define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
   %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
   ret <8 x i16> %3
 }
+
+; fold (uadd_sat (shuffle x, u, m), (shuffle y, u, m)) -> (shuffle (uadd_sat x, y), u, m)
+define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
+; SSE-LABEL: combine_shuffle_shuffle_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    paddusw %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_shuffle_shuffle_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %res = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x1, <8 x i16> %y1)
+  ret <8 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
index a7a0546d11e6b..812d1b9e5ac55 100644
--- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
@@ -114,3 +114,24 @@ define <8 x i16> @combine_self_v8i16(<8 x i16> %a0) {
   %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a0)
   ret <8 x i16> %1
 }
+
+; fold (ssub_sat (shuffle x, u, m), (shuffle y, u, m)) -> (shuffle (ssub_sat x, y), u, m)
+define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
+; SSE-LABEL: combine_shuffle_shuffle_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    psubsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_shuffle_shuffle_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %res = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x1, <8 x i16> %y1)
+  ret <8 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll
index 10d483a5ad650..594153ac3ef86 100644
--- a/llvm/test/CodeGen/X86/combine-sub-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll
@@ -244,3 +244,24 @@ define <8 x i16> @combine_trunc_v8i32_v8i16(<8 x i16> %a0, <8 x i32> %a1) {
   %3 = trunc <8 x i32> %2 to <8 x i16>
   ret <8 x i16> %3
 }
+
+; fold (usub_sat (shuffle x, u, m), (shuffle y, u, m)) -> (shuffle (usub_sat x, y), u, m)
+define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
+; SSE-LABEL: combine_shuffle_shuffle_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    psubusw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_shuffle_shuffle_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  %res = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x1, <8 x i16> %y1)
+  ret <8 x i16> %res
+}