[llvm] d088454 - [X86] canonicalizeShuffleWithBinOps - add binary shuffle handling

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 9 06:00:50 PST 2021


Author: Simon Pilgrim
Date: 2021-03-09T13:57:03Z
New Revision: d0884541ccaa3f80526c99c3fcebbb6155c9ed4c

URL: https://github.com/llvm/llvm-project/commit/d0884541ccaa3f80526c99c3fcebbb6155c9ed4c
DIFF: https://github.com/llvm/llvm-project/commit/d0884541ccaa3f80526c99c3fcebbb6155c9ed4c.diff

LOG: [X86] canonicalizeShuffleWithBinOps - add binary shuffle handling

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vsplit-and.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d84b8123aef3..f752ba9c5ed7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36820,8 +36820,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
   EVT ShuffleVT = N.getValueType();
 
   auto IsMergeableWithShuffle = [](SDValue Op) {
-    // AllZeros/AllOnes constants are freely shuffled and will peek through bitcasts.
-    // Other constant build vectors do not peek through bitcasts.
+    // AllZeros/AllOnes constants are freely shuffled and will peek through
+    // bitcasts. Other constant build vectors do not peek through bitcasts.
     return ISD::isBuildVectorAllOnes(Op.getNode()) ||
            ISD::isBuildVectorAllZeros(Op.getNode()) ||
            ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
@@ -36836,6 +36836,7 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
 
   unsigned Opc = N.getOpcode();
   switch (Opc) {
+  // Unary and Unary+Permute Shuffles.
   case X86ISD::VBROADCAST:
   case X86ISD::MOVDDUP:
   case X86ISD::PSHUFB:
@@ -36868,6 +36869,47 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
     }
     break;
   }
+  // Binary and Binary+Permute Shuffles.
+  case X86ISD::BLENDI:
+  case X86ISD::SHUFP:
+  case X86ISD::UNPCKH:
+  case X86ISD::UNPCKL: {
+    if (N->isOnlyUserOf(N.getOperand(0).getNode()) &&
+        N->isOnlyUserOf(N.getOperand(1).getNode())) {
+      SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+      SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+      unsigned SrcOpcode = N0.getOpcode();
+      if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode &&
+          IsSafeToMoveShuffle(N0, SrcOpcode) &&
+          IsSafeToMoveShuffle(N1, SrcOpcode)) {
+        SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
+        SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
+        SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
+        SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1));
+        if ((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) ||
+            (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) {
+          SDValue LHS, RHS;
+          Op00 = DAG.getBitcast(ShuffleVT, Op00);
+          Op10 = DAG.getBitcast(ShuffleVT, Op10);
+          Op01 = DAG.getBitcast(ShuffleVT, Op01);
+          Op11 = DAG.getBitcast(ShuffleVT, Op11);
+          if (N.getNumOperands() == 3) {
+            LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2));
+            RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2));
+          } else {
+            LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10);
+            RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11);
+          }
+          EVT OpVT = N0.getValueType();
+          return DAG.getBitcast(ShuffleVT,
+                                DAG.getNode(SrcOpcode, DL, OpVT,
+                                            DAG.getBitcast(OpVT, LHS),
+                                            DAG.getBitcast(OpVT, RHS)));
+        }
+      }
+    }
+    break;
+  }
   }
   return SDValue();
 }

diff  --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 26bbcdbe5d91..aa043ed67f69 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -23,37 +23,34 @@ define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind read
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
 ; CHECK-LABEL: t2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %r9, %xmm1
-; CHECK-NEXT:    movq %r8, %xmm0
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT:    movq %rdx, %xmm1
+; CHECK-NEXT:    movq %r9, %xmm0
+; CHECK-NEXT:    movq %r8, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movq %rdx, %xmm0
 ; CHECK-NEXT:    movq %rsi, %xmm2
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT:    movq %rcx, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; CHECK-NEXT:    movq %rcx, %xmm0
 ; CHECK-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    pxor %xmm4, %xmm4
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm5, %xmm5
-; CHECK-NEXT:    pxor %xmm5, %xmm1
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm0
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm2
-; CHECK-NEXT:    pxor %xmm5, %xmm2
-; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT:    pxor %xmm5, %xmm3
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm0
-; CHECK-NEXT:    pxor %xmm5, %xmm0
-; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
-; CHECK-NEXT:    andps %xmm2, %xmm0
-; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
-; CHECK-NEXT:    psllq $63, %xmm1
-; CHECK-NEXT:    psrad $31, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm1
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
+; CHECK-NEXT:    xorps %xmm0, %xmm1
+; CHECK-NEXT:    andnps %xmm1, %xmm2
+; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
 ; CHECK-NEXT:    psllq $63, %xmm0
 ; CHECK-NEXT:    psrad $31, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT:    movq %xmm0, 16(%rdi)
-; CHECK-NEXT:    movdqa %xmm1, (%rdi)
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
+; CHECK-NEXT:    psllq $63, %xmm1
+; CHECK-NEXT:    psrad $31, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NEXT:    movq %xmm1, 16(%rdi)
+; CHECK-NEXT:    movdqa %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer


        


More information about the llvm-commits mailing list