[llvm] ffb2887 - [DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),undef) -> bop(shuffle'(x,y),shuffle'(z,w))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 19 07:15:06 PDT 2021
Author: Simon Pilgrim
Date: 2021-03-19T14:14:56Z
New Revision: ffb28871037105c899f63726953b6c4e7aa7b148
URL: https://github.com/llvm/llvm-project/commit/ffb28871037105c899f63726953b6c4e7aa7b148
DIFF: https://github.com/llvm/llvm-project/commit/ffb28871037105c899f63726953b6c4e7aa7b148.diff
LOG: [DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),undef) -> bop(shuffle'(x,y),shuffle'(z,w))
Followup to D96345, handle unary shuffles of binops (as well as binary shuffles) if we can merge the shuffle with inner operand shuffles.
Differential Revision: https://reviews.llvm.org/D98646
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/haddsub-4.ll
llvm/test/CodeGen/X86/haddsub-shuf.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 382fc91285a0..16833c5977d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21255,14 +21255,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Merge shuffles through binops if we are able to merge it with at least
// one other shuffles.
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
unsigned SrcOpcode = N0.getOpcode();
- if (SrcOpcode == N1.getOpcode() && TLI.isBinOp(SrcOpcode) &&
- N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) {
+ if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
+ (N1.isUndef() ||
+ (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
+ // Get binop source ops, or just pass on the undef.
SDValue Op00 = N0.getOperand(0);
- SDValue Op10 = N1.getOperand(0);
SDValue Op01 = N0.getOperand(1);
- SDValue Op11 = N1.getOperand(1);
+ SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
+ SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
// TODO: We might be able to relax the VT check but we don't currently
// have any isBinOp() that has
diff erent result/ops VTs so play safe until
// we have test coverage.
diff --git a/llvm/test/CodeGen/X86/haddsub-4.ll b/llvm/test/CodeGen/X86/haddsub-4.ll
index 2e077d6247ba..3784400e3086 100644
--- a/llvm/test/CodeGen/X86/haddsub-4.ll
+++ b/llvm/test/CodeGen/X86/haddsub-4.ll
@@ -123,26 +123,25 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 x float> %a1) {
define <8 x float> @hadd_reverse3_v8f32(<8 x float> %a0, <8 x float> %a1) {
; SSE-LABEL: hadd_reverse3_v8f32:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm4
-; SSE-NEXT: haddps %xmm2, %xmm4
-; SSE-NEXT: haddps %xmm3, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2,1,0]
-; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,2,1,0]
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: movaps %xmm4, %xmm1
+; SSE-NEXT: haddps %xmm1, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0,3,2]
+; SSE-NEXT: haddps %xmm0, %xmm2
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0,3,2]
+; SSE-NEXT: movaps %xmm3, %xmm0
+; SSE-NEXT: movaps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: hadd_reverse3_v8f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vhaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse3_v8f32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT: vhaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: retq
%shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/CodeGen/X86/haddsub-shuf.ll
index 22007df8320a..429175a10818 100644
--- a/llvm/test/CodeGen/X86/haddsub-shuf.ll
+++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll
@@ -525,7 +525,6 @@ define <8 x i32> @hadd_v8i32b(<8 x i32> %a) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_v8i32b:
@@ -615,7 +614,6 @@ define <8 x i32> @hsub_v8i32b(<8 x i32> %a) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hsub_v8i32b:
@@ -705,7 +703,6 @@ define <16 x i16> @hadd_v16i16b(<16 x i16> %a) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_v16i16b:
@@ -795,7 +792,6 @@ define <16 x i16> @hsub_v16i16b(<16 x i16> %a) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hsub_v16i16b:
diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index 18cd42c8c1de..bed0abf5a26b 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -513,9 +513,8 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
; X86: # %bb.0:
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
-; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X86-NEXT: retl
;
@@ -523,9 +522,8 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
@@ -553,9 +551,8 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
; X86: # %bb.0:
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
-; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X86-NEXT: retl
;
@@ -563,9 +560,8 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
@@ -593,9 +589,8 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
; X86: # %bb.0:
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
-; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X86-NEXT: retl
;
@@ -603,9 +598,8 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
@@ -633,9 +627,8 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
; X86: # %bb.0:
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
-; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X86-NEXT: retl
;
@@ -643,9 +636,8 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
More information about the llvm-commits
mailing list