[llvm] 6e574a4 - [X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs (#141214)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 23 03:02:49 PDT 2025
Author: Simon Pilgrim
Date: 2025-05-23T11:02:46+01:00
New Revision: 6e574a4fa332cd458dc75dc29027026b2d416b3a
URL: https://github.com/llvm/llvm-project/commit/6e574a4fa332cd458dc75dc29027026b2d416b3a
DIFF: https://github.com/llvm/llvm-project/commit/6e574a4fa332cd458dc75dc29027026b2d416b3a.diff
LOG: [X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs (#141214)
Make it easier for splat/element-equivalent detection by ensuring
constant splats contain no undefs.
Integer constants are limited to rematerializable zeros/ones values to
avoid unnecessary scalar_to_vector(int) -> load conversions - we can
relax this later if useful
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr34592.ll
llvm/test/CodeGen/X86/pr38639.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ce4fa51692b3..92e980574a187 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18322,6 +18322,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
"canonicalizeShuffleMaskWithHorizOp "
"shouldn't alter the shuffle mask size");
+ // Canonicalize zeros/ones/fp splat constants to ensure no undefs.
+ // These will be materialized uniformly anyway, so make splat matching easier.
+ // TODO: Allow all int constants?
+ auto CanonicalizeConstant = [VT, &DL, &DAG](SDValue V) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector Undefs;
+ if (SDValue Splat = BV->getSplatValue(&Undefs)) {
+ if (Undefs.any() &&
+ (isNullConstant(Splat) || isAllOnesConstant(Splat) ||
+ isa<ConstantFPSDNode>(Splat))) {
+ V = DAG.getBitcast(VT, DAG.getSplat(BV->getValueType(0), DL, Splat));
+ }
+ }
+ }
+ return V;
+ };
+ V1 = CanonicalizeConstant(V1);
+ V2 = CanonicalizeConstant(V2);
+
// Commute the shuffle if it will improve canonicalization.
if (canonicalizeShuffleMaskWithCommute(Mask)) {
ShuffleVectorSDNode::commuteMask(Mask);
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index aed5ea3ed217b..7cbdb39ddf860 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -24,12 +24,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-O0-NEXT: vmovaps 48(%rbp), %ymm11
; CHECK-O0-NEXT: vmovaps 16(%rbp), %ymm11
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm0[6,7]
+; CHECK-O0-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-O0-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[2],ymm1[2]
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,1]
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7]
; CHECK-O0-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm7[2,3],ymm6[0,1]
-; CHECK-O0-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5,6,7]
; CHECK-O0-NEXT: vmovaps %xmm1, %xmm3
; CHECK-O0-NEXT: vmovaps %xmm7, %xmm1
@@ -55,12 +55,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-O3-NEXT: vmovdqa 208(%rbp), %ymm3
; CHECK-O3-NEXT: vmovdqa 144(%rbp), %ymm0
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm2[6,7]
+; CHECK-O3-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,1]
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
; CHECK-O3-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm7[2,3],ymm6[0,1]
-; CHECK-O3-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1],ymm2[2,3],ymm1[4,5,6,7]
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,2,3]
diff --git a/llvm/test/CodeGen/X86/pr38639.ll b/llvm/test/CodeGen/X86/pr38639.ll
index 15cc7581454aa..8eb3da1190285 100644
--- a/llvm/test/CodeGen/X86/pr38639.ll
+++ b/llvm/test/CodeGen/X86/pr38639.ll
@@ -6,11 +6,8 @@ define <8 x double> @test(<4 x double> %a, <4 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1]
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
-; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [8.2071743224100002E-1,8.2071743224100002E-1]
-; CHECK-NEXT: # xmm2 = mem[0,0]
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
; CHECK-NEXT: retq
%1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
ret <8 x double> %1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 62f59e918f00c..0eb72c8bc0be4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -2402,7 +2402,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; AVX1-NEXT: vmovaps %xmm1, (%rsi)
; AVX1-NEXT: retq
@@ -2411,7 +2411,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
; AVX2OR512VL-NEXT: vmovaps %xmm1, (%rsi)
; AVX2OR512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 545a9d3e314a2..07498c1233b5d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -640,8 +640,7 @@ define <32 x float> @PR47534(<8 x float> %tmp) {
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [7,25,26,27,7,29,30,31,7,25,26,27,7,29,30,31]
-; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT: vpmovsxbd {{.*#+}} zmm1 = [7,17,18,19,7,21,22,23,0,25,26,27,0,29,30,31]
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
; CHECK-NEXT: ret{{[l|q]}}
%tmp1 = shufflevector <8 x float> %tmp, <8 x float> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
More information about the llvm-commits
mailing list