[llvm] d088454 - [X86] canonicalizeShuffleWithBinOps - add binary shuffle handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 9 06:00:50 PST 2021
Author: Simon Pilgrim
Date: 2021-03-09T13:57:03Z
New Revision: d0884541ccaa3f80526c99c3fcebbb6155c9ed4c
URL: https://github.com/llvm/llvm-project/commit/d0884541ccaa3f80526c99c3fcebbb6155c9ed4c
DIFF: https://github.com/llvm/llvm-project/commit/d0884541ccaa3f80526c99c3fcebbb6155c9ed4c.diff
LOG: [X86] canonicalizeShuffleWithBinOps - add binary shuffle handling
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vsplit-and.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d84b8123aef3..f752ba9c5ed7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36820,8 +36820,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
EVT ShuffleVT = N.getValueType();
auto IsMergeableWithShuffle = [](SDValue Op) {
- // AllZeros/AllOnes constants are freely shuffled and will peek through bitcasts.
- // Other constant build vectors do not peek through bitcasts.
+ // AllZeros/AllOnes constants are freely shuffled and will peek through
+ // bitcasts. Other constant build vectors do not peek through bitcasts.
return ISD::isBuildVectorAllOnes(Op.getNode()) ||
ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
@@ -36836,6 +36836,7 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
unsigned Opc = N.getOpcode();
switch (Opc) {
+ // Unary and Unary+Permute Shuffles.
case X86ISD::VBROADCAST:
case X86ISD::MOVDDUP:
case X86ISD::PSHUFB:
@@ -36868,6 +36869,47 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
}
break;
}
+ // Binary and Binary+Permute Shuffles.
+ case X86ISD::BLENDI:
+ case X86ISD::SHUFP:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL: {
+ if (N->isOnlyUserOf(N.getOperand(0).getNode()) &&
+ N->isOnlyUserOf(N.getOperand(1).getNode())) {
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+ unsigned SrcOpcode = N0.getOpcode();
+ if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode &&
+ IsSafeToMoveShuffle(N0, SrcOpcode) &&
+ IsSafeToMoveShuffle(N1, SrcOpcode)) {
+ SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
+ SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
+ SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
+ SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1));
+ if ((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) ||
+ (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) {
+ SDValue LHS, RHS;
+ Op00 = DAG.getBitcast(ShuffleVT, Op00);
+ Op10 = DAG.getBitcast(ShuffleVT, Op10);
+ Op01 = DAG.getBitcast(ShuffleVT, Op01);
+ Op11 = DAG.getBitcast(ShuffleVT, Op11);
+ if (N.getNumOperands() == 3) {
+ LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2));
+ RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2));
+ } else {
+ LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10);
+ RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11);
+ }
+ EVT OpVT = N0.getValueType();
+ return DAG.getBitcast(ShuffleVT,
+ DAG.getNode(SrcOpcode, DL, OpVT,
+ DAG.getBitcast(OpVT, LHS),
+ DAG.getBitcast(OpVT, RHS)));
+ }
+ }
+ }
+ break;
+ }
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 26bbcdbe5d91..aa043ed67f69 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -23,37 +23,34 @@ define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind read
define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
; CHECK-LABEL: t2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %r9, %xmm1
-; CHECK-NEXT: movq %r8, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movq %r9, %xmm0
+; CHECK-NEXT: movq %r8, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movq %rdx, %xmm0
; CHECK-NEXT: movq %rsi, %xmm2
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; CHECK-NEXT: movq %rcx, %xmm0
; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: pxor %xmm4, %xmm4
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm5, %xmm5
-; CHECK-NEXT: pxor %xmm5, %xmm1
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
-; CHECK-NEXT: pxor %xmm5, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT: pxor %xmm5, %xmm3
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
-; CHECK-NEXT: pxor %xmm5, %xmm0
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
-; CHECK-NEXT: andps %xmm2, %xmm0
-; CHECK-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
-; CHECK-NEXT: psllq $63, %xmm1
-; CHECK-NEXT: psrad $31, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
+; CHECK-NEXT: xorps %xmm0, %xmm1
+; CHECK-NEXT: andnps %xmm1, %xmm2
+; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
; CHECK-NEXT: psllq $63, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT: movq %xmm0, 16(%rdi)
-; CHECK-NEXT: movdqa %xmm1, (%rdi)
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
+; CHECK-NEXT: psllq $63, %xmm1
+; CHECK-NEXT: psrad $31, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NEXT: movq %xmm1, 16(%rdi)
+; CHECK-NEXT: movdqa %xmm0, (%rdi)
; CHECK-NEXT: retq
%cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
%cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
More information about the llvm-commits
mailing list