[llvm] db50b77 - [X86] matchBinaryShuffle - match PACKSS for v2i64 -> v4i32 all-signbits shuffle truncation patterns.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 19 09:03:27 PDT 2023


Author: Simon Pilgrim
Date: 2023-07-19T17:02:11+01:00
New Revision: db50b77ed470d98e68fe8dc03d23b044545dbc82

URL: https://github.com/llvm/llvm-project/commit/db50b77ed470d98e68fe8dc03d23b044545dbc82
DIFF: https://github.com/llvm/llvm-project/commit/db50b77ed470d98e68fe8dc03d23b044545dbc82.diff

LOG: [X86] matchBinaryShuffle - match PACKSS for v2i64 -> v4i32 all-signbits shuffle truncation patterns.

Ideally matchShuffleWithPACK should be able to handle this, but it needs a major rewrite to handle illegal types.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
    llvm/test/CodeGen/X86/vector-pcmp.ll
    llvm/test/CodeGen/X86/vsplit-and.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index abe9f29bebd400..0ba904116f15e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39935,6 +39935,18 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
       return true;
     }
   }
+  // TODO: Can we handle this inside matchShuffleWithPACK?
+  if (MaskVT == MVT::v4i32 && Subtarget.hasSSE2() &&
+      isTargetShuffleEquivalent(MaskVT, Mask, {0, 2, 4, 6}, DAG)) {
+    if (V1.getScalarValueSizeInBits() == 64 &&
+        V2.getScalarValueSizeInBits() == 64 &&
+        DAG.ComputeNumSignBits(V1) == 64 && DAG.ComputeNumSignBits(V2) == 64) {
+      SrcVT = MVT::v4i32;
+      DstVT = MVT::v8i16;
+      Shuffle = X86ISD::PACKSS;
+      return true;
+    }
+  }
 
   // Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
   if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||

diff  --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 92d4830452a8d1..e5311beb580796 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -521,15 +521,15 @@ define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
 ; SSE41-LABEL: eq_or_to_abs_vec4x64:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
-; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
-; SSE41-NEXT:    pcmpeqq %xmm3, %xmm0
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE41-NEXT:    orps %xmm2, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: eq_or_to_abs_vec4x64:
@@ -577,15 +577,15 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
 ; SSE41-LABEL: eq_or_to_abs_vec4x64_sext:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
-; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
-; SSE41-NEXT:    pcmpeqq %xmm3, %xmm0
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE41-NEXT:    orps %xmm2, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
 ; SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
 ; SSE41-NEXT:    pslld $31, %xmm1
@@ -652,17 +652,17 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
 ; SSE41-LABEL: ne_and_to_abs_vec4x64:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
-; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
-; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE41-NEXT:    orps %xmm2, %xmm0
-; SSE41-NEXT:    xorps %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm4, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: ne_and_to_abs_vec4x64:
@@ -715,17 +715,17 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
 ; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
-; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
-; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE41-NEXT:    orps %xmm2, %xmm0
-; SSE41-NEXT:    xorps %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm4, %xmm0
 ; SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
 ; SSE41-NEXT:    pslld $31, %xmm1

diff  --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll
index a21b959c229d6c..5b43acbe523757 100644
--- a/llvm/test/CodeGen/X86/vector-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vector-pcmp.ll
@@ -1601,8 +1601,8 @@ define <4 x i1> @is_positive_mask_v4i64_v4i1(<4 x i64> %x, <4 x i1> %y) {
 ; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
 ; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
 ; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
-; SSE42-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE42-NEXT:    andps %xmm2, %xmm0
+; SSE42-NEXT:    packssdw %xmm1, %xmm0
+; SSE42-NEXT:    pand %xmm2, %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: is_positive_mask_v4i64_v4i1:

diff  --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 85def820ecb92a..833db0efbda89c 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -32,15 +32,15 @@ define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
 ; CHECK-NEXT:    movq %rcx, %xmm0
 ; CHECK-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    pxor %xmm4, %xmm4
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm0
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm2
-; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm3
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm5, %xmm5
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
-; CHECK-NEXT:    orps %xmm2, %xmm1
-; CHECK-NEXT:    xorps %xmm0, %xmm1
+; CHECK-NEXT:    por %xmm2, %xmm1
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm3
+; CHECK-NEXT:    por %xmm0, %xmm3
+; CHECK-NEXT:    packssdw %xmm3, %xmm1
+; CHECK-NEXT:    pxor %xmm5, %xmm1
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2]
 ; CHECK-NEXT:    pslld $31, %xmm0
 ; CHECK-NEXT:    psrad $31, %xmm0


        


More information about the llvm-commits mailing list