[llvm] cf5c63d - [DAG] visitVECTOR_SHUFFLE - fold splat(insert_vector_elt()) and splat(scalar_to_vector()) to build_vector splats
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 11 13:18:13 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-11T21:06:42+01:00
New Revision: cf5c63d187f4e8e2b1855c2c37fbc79d47852ec8
URL: https://github.com/llvm/llvm-project/commit/cf5c63d187f4e8e2b1855c2c37fbc79d47852ec8
DIFF: https://github.com/llvm/llvm-project/commit/cf5c63d187f4e8e2b1855c2c37fbc79d47852ec8.diff
LOG: [DAG] visitVECTOR_SHUFFLE - fold splat(insert_vector_elt()) and splat(scalar_to_vector()) to build_vector splats
Addresses a number of regressions identified in D127115
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/pr15296.ll
llvm/test/CodeGen/X86/pr51615.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ca5a804c5f996..f87e129d3b22d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22313,6 +22313,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
}
+ // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
+ // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
+ if (Idx->getAPIntValue() == SplatIndex)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+ }
+
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 633befec208de..911aa7d992327 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -1002,13 +1002,10 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_
; CHECK-NOVSX-LABEL: testSplat8:
; CHECK-NOVSX: # %bb.0: # %entry
; CHECK-NOVSX-NEXT: ld r3, 0(r3)
-; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI19_0 at toc@ha
-; CHECK-NOVSX-NEXT: addi r4, r4, .LCPI19_0 at toc@l
-; CHECK-NOVSX-NEXT: lvx v2, 0, r4
+; CHECK-NOVSX-NEXT: addi r4, r1, -16
+; CHECK-NOVSX-NEXT: std r3, -8(r1)
; CHECK-NOVSX-NEXT: std r3, -16(r1)
-; CHECK-NOVSX-NEXT: addi r3, r1, -16
-; CHECK-NOVSX-NEXT: lvx v3, 0, r3
-; CHECK-NOVSX-NEXT: vperm v2, v3, v3, v2
+; CHECK-NOVSX-NEXT: lvx v2, 0, r4
; CHECK-NOVSX-NEXT: blr
;
; CHECK-P7-LABEL: testSplat8:
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 5eb1810ac55d2..cf8796ad1d709 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -546,11 +546,8 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
;
; P7-LABEL: unadjusted_lxvwsx:
; P7: # %bb.0: # %entry
-; P7-NEXT: lwz r3, 0(r3)
-; P7-NEXT: addi r4, r1, -16
-; P7-NEXT: stw r3, -16(r1)
-; P7-NEXT: lxvw4x vs0, 0, r4
-; P7-NEXT: xxspltw v2, vs0, 0
+; P7-NEXT: lfiwzx f0, 0, r3
+; P7-NEXT: xxspltw v2, vs0, 1
; P7-NEXT: blr
;
; P9-AIX32-LABEL: unadjusted_lxvwsx:
@@ -566,11 +563,8 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
;
; P7-AIX32-LABEL: unadjusted_lxvwsx:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: lwz r3, 0(r3)
-; P7-AIX32-NEXT: addi r4, r1, -16
-; P7-AIX32-NEXT: stw r3, -16(r1)
-; P7-AIX32-NEXT: lxvw4x vs0, 0, r4
-; P7-AIX32-NEXT: xxspltw v2, vs0, 0
+; P7-AIX32-NEXT: lfiwzx f0, 0, r3
+; P7-AIX32-NEXT: xxspltw v2, vs0, 1
; P7-AIX32-NEXT: blr
entry:
%0 = bitcast i32* %s to <4 x i8>*
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 6558afc4eda28..f00f0377ecf9b 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -36,7 +36,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
; AVX2-LABEL: ext_i2_2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -207,7 +207,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX1-LABEL: ext_i4_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -418,8 +418,7 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
;
; AVX1-LABEL: ext_i8_8i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index d24d26f830733..5c7a44c948d50 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -39,7 +39,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
; AVX2-LABEL: ext_i2_2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -263,7 +263,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX1-LABEL: ext_i4_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -541,8 +541,7 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
;
; AVX1-LABEL: ext_i8_8i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index c64953293fa40..b803ea61db30f 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -31,7 +31,7 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
; AVX2-LABEL: bitcast_i2_2i1:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll
index 813c67591ae8d..726887e0a6b6d 100644
--- a/llvm/test/CodeGen/X86/pr15296.ll
+++ b/llvm/test/CodeGen/X86/pr15296.ll
@@ -36,12 +36,11 @@ allocas:
define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
; X86-LABEL: shiftInput___canonical:
; X86: # %bb.0: # %allocas
-; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X86-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
-; X86-NEXT: vpsrld %xmm1, %xmm2, %xmm2
-; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-NEXT: vpsrld %xmm2, %xmm1, %xmm1
+; X86-NEXT: vpsrld %xmm2, %xmm0, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: shiftInput___canonical:
diff --git a/llvm/test/CodeGen/X86/pr51615.ll b/llvm/test/CodeGen/X86/pr51615.ll
index feccec0a249ee..1010d20316375 100644
--- a/llvm/test/CodeGen/X86/pr51615.ll
+++ b/llvm/test/CodeGen/X86/pr51615.ll
@@ -81,49 +81,27 @@ define void @volatile_load_2_elts_bitcast() {
}
define void @elts_from_consecutive_loads(<2 x i64>* %arg, i32* %arg12, <8 x i32>* %arg13, float %arg14, i1 %arg15) {
-; AVX-LABEL: elts_from_consecutive_loads:
-; AVX: # %bb.0: # %bb
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: .p2align 4, 0x90
-; AVX-NEXT: .LBB3_1: # %bb16
-; AVX-NEXT: # =>This Loop Header: Depth=1
-; AVX-NEXT: # Child Loop BB3_2 Depth 2
-; AVX-NEXT: testb $1, %cl
-; AVX-NEXT: je .LBB3_1
-; AVX-NEXT: .p2align 4, 0x90
-; AVX-NEXT: .LBB3_2: # %bb17
-; AVX-NEXT: # Parent Loop BB3_1 Depth=1
-; AVX-NEXT: # => This Inner Loop Header: Depth=2
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vbroadcastss (%rdi), %ymm2
-; AVX-NEXT: movl %eax, (%rsi)
-; AVX-NEXT: vmovaps %ymm2, (%rdx)
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: jne .LBB3_2
-; AVX-NEXT: jp .LBB3_2
-; AVX-NEXT: jmp .LBB3_1
-;
-; AVX2-LABEL: elts_from_consecutive_loads:
-; AVX2: # %bb.0: # %bb
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: .p2align 4, 0x90
-; AVX2-NEXT: .LBB3_1: # %bb16
-; AVX2-NEXT: # =>This Loop Header: Depth=1
-; AVX2-NEXT: # Child Loop BB3_2 Depth 2
-; AVX2-NEXT: testb $1, %cl
-; AVX2-NEXT: je .LBB3_1
-; AVX2-NEXT: .p2align 4, 0x90
-; AVX2-NEXT: .LBB3_2: # %bb17
-; AVX2-NEXT: # Parent Loop BB3_1 Depth=1
-; AVX2-NEXT: # => This Inner Loop Header: Depth=2
-; AVX2-NEXT: vmovaps (%rdi), %xmm2
-; AVX2-NEXT: vmovss %xmm2, (%rsi)
-; AVX2-NEXT: vbroadcastss %xmm2, %ymm2
-; AVX2-NEXT: vmovaps %ymm2, (%rdx)
-; AVX2-NEXT: vucomiss %xmm1, %xmm0
-; AVX2-NEXT: jne .LBB3_2
-; AVX2-NEXT: jp .LBB3_2
-; AVX2-NEXT: jmp .LBB3_1
+; ALL-LABEL: elts_from_consecutive_loads:
+; ALL: # %bb.0: # %bb
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: .p2align 4, 0x90
+; ALL-NEXT: .LBB3_1: # %bb16
+; ALL-NEXT: # =>This Loop Header: Depth=1
+; ALL-NEXT: # Child Loop BB3_2 Depth 2
+; ALL-NEXT: testb $1, %cl
+; ALL-NEXT: je .LBB3_1
+; ALL-NEXT: .p2align 4, 0x90
+; ALL-NEXT: .LBB3_2: # %bb17
+; ALL-NEXT: # Parent Loop BB3_1 Depth=1
+; ALL-NEXT: # => This Inner Loop Header: Depth=2
+; ALL-NEXT: movl (%rdi), %eax
+; ALL-NEXT: vbroadcastss (%rdi), %ymm2
+; ALL-NEXT: movl %eax, (%rsi)
+; ALL-NEXT: vmovaps %ymm2, (%rdx)
+; ALL-NEXT: vucomiss %xmm1, %xmm0
+; ALL-NEXT: jne .LBB3_2
+; ALL-NEXT: jp .LBB3_2
+; ALL-NEXT: jmp .LBB3_1
bb:
br label %bb16
More information about the llvm-commits
mailing list