[llvm-branch-commits] [llvm] be69e66 - [X86][SSE] Attempt to fold shuffle(binop(), binop()) -> binop(shuffle(), shuffle())
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 15 08:43:04 PST 2021
Author: Simon Pilgrim
Date: 2021-01-15T16:25:25Z
New Revision: be69e66b1cd826f499566e1c3dadbf04e872baa0
URL: https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0
DIFF: https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0.diff
LOG: [X86][SSE] Attempt to fold shuffle(binop(),binop()) -> binop(shuffle(),shuffle())
If this will help us fold shuffles together, then push the shuffle through the merged binops.
Ideally this would be performed in DAGCombiner::visitVECTOR_SHUFFLE but getting an efficient+legal merged shuffle can be tricky - on SSE we can be confident that for 32/64-bit elements vectors shuffles should easily fold.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-shuf.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a84250782c19..d2cc2395576a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37939,6 +37939,33 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
return HAddSub;
+
+ // Merge shuffles through binops if its likely we'll be able to merge it
+ // with other shuffles.
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
+ // TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE.
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N)) {
+ unsigned SrcOpcode = N->getOperand(0).getOpcode();
+ if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->isOnlyUserOf(N->getOperand(1).getNode()) &&
+ VT.getScalarSizeInBits() >= 32) {
+ SDValue Op00 = N->getOperand(0).getOperand(0);
+ SDValue Op10 = N->getOperand(1).getOperand(0);
+ SDValue Op01 = N->getOperand(0).getOperand(1);
+ SDValue Op11 = N->getOperand(1).getOperand(1);
+ if ((Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op10.getOpcode() == ISD::VECTOR_SHUFFLE) &&
+ (Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
+ SDLoc DL(N);
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask);
+ SDValue RHS = DAG.getVectorShuffle(VT, DL, Op01, Op11, Mask);
+ return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
+ }
+ }
+ }
}
// Attempt to combine into a vector load/broadcast.
diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/CodeGen/X86/haddsub-shuf.ll
index 9b2dfc1ce0cb..37eedcd54441 100644
--- a/llvm/test/CodeGen/X86/haddsub-shuf.ll
+++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll
@@ -923,45 +923,15 @@ define <4 x float> @PR34724_1(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @PR34724_2(<4 x float> %a, <4 x float> %b) {
-; SSSE3_SLOW-LABEL: PR34724_2:
-; SSSE3_SLOW: # %bb.0:
-; SSSE3_SLOW-NEXT: haddps %xmm1, %xmm0
-; SSSE3_SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; SSSE3_SLOW-NEXT: addps %xmm1, %xmm2
-; SSSE3_SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
-; SSSE3_SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
-; SSSE3_SLOW-NEXT: retq
-;
-; SSSE3_FAST-LABEL: PR34724_2:
-; SSSE3_FAST: # %bb.0:
-; SSSE3_FAST-NEXT: haddps %xmm1, %xmm0
-; SSSE3_FAST-NEXT: retq
-;
-; AVX1_SLOW-LABEL: PR34724_2:
-; AVX1_SLOW: # %bb.0:
-; AVX1_SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX1_SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; AVX1_SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX1_SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX1_SLOW-NEXT: retq
-;
-; AVX1_FAST-LABEL: PR34724_2:
-; AVX1_FAST: # %bb.0:
-; AVX1_FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX1_FAST-NEXT: retq
-;
-; AVX2_SLOW-LABEL: PR34724_2:
-; AVX2_SLOW: # %bb.0:
-; AVX2_SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX2_SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; AVX2_SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX2_SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX2_SLOW-NEXT: retq
+; SSSE3-LABEL: PR34724_2:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: haddps %xmm1, %xmm0
+; SSSE3-NEXT: retq
;
-; AVX2_FAST-LABEL: PR34724_2:
-; AVX2_FAST: # %bb.0:
-; AVX2_FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX2_FAST-NEXT: retq
+; AVX-LABEL: PR34724_2:
+; AVX: # %bb.0:
+; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 4, i32 undef, i32 undef>
%t1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 5, i32 undef, i32 undef>
%t2 = fadd <4 x float> %t0, %t1
More information about the llvm-branch-commits
mailing list