[llvm] 9a77af3 - [X86] lowerV4F64Shuffle - prefer BLEND before UNPCK shuffle matching (#141073)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 08:51:03 PDT 2025
Author: Simon Pilgrim
Date: 2025-05-22T16:51:00+01:00
New Revision: 9a77af37d8719bcfa97e31a730c1a401b91a8e14
URL: https://github.com/llvm/llvm-project/commit/9a77af37d8719bcfa97e31a730c1a401b91a8e14
DIFF: https://github.com/llvm/llvm-project/commit/9a77af37d8719bcfa97e31a730c1a401b91a8e14.diff
LOG: [X86] lowerV4F64Shuffle - prefer BLEND before UNPCK shuffle matching (#141073)
Use the same matching order as other 128/256-bit shuffles
Fixes regression identified in #139741
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
llvm/test/CodeGen/X86/subvector-broadcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7abc854454348..38be3a82af658 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -16444,15 +16444,14 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DAG, Subtarget);
}
- // Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
- return V;
-
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
- // Check if the blend happens to exactly fit that of SHUFPD.
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
+ return V;
+
if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Op;
diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
index 45df725d7a78c..0cdc5458e71ca 100644
--- a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -7,7 +7,7 @@ define void @foo(<2 x float> %0) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
; CHECK-NEXT: vmovlps %xmm0, 0
; CHECK-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 76183ac5f8fa3..75333bf835f89 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -1662,19 +1662,46 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(ptr %vp, <4 x double> %default)
ret <4 x double> %res
}
+; TODO: prefer vblend vs vunpckh on AVX1 targets
define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(ptr %vp, <8 x float> %default) {
-; X86-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vbroadcastsd (%eax), %ymm1
-; X86-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; X86-NEXT: retl
+; X86-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; X86-AVX1-NEXT: retl
;
-; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
-; X64: # %bb.0:
-; X64-NEXT: vbroadcastsd (%rdi), %ymm1
-; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; X64-NEXT: retq
+; X86-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X86-AVX2-NEXT: retl
+;
+; X86-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX512: # %bb.0:
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vbroadcastsd (%rdi), %ymm1
+; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
+; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm1
+; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X64-AVX512-NEXT: retq
%vec = load <2 x float>, ptr %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef>
%res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default
More information about the llvm-commits
mailing list