[llvm] 0f23557 - [X86] Fold broadcast(scalar) -> scalar_to_vector(scalar) if only the lowest element is demanded.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 02:20:06 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-20T10:19:46+01:00
New Revision: 0f235573de6386260afcd8fa144d24366927288c
URL: https://github.com/llvm/llvm-project/commit/0f235573de6386260afcd8fa144d24366927288c
DIFF: https://github.com/llvm/llvm-project/commit/0f235573de6386260afcd8fa144d24366927288c.diff
LOG: [X86] Fold broadcast(scalar) -> scalar_to_vector(scalar) if only the lowest element is demanded.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr29222.ll
llvm/test/CodeGen/X86/shuffle-half.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b9c9e5703849ae..4c6a323ffba6ae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42863,15 +42863,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
- if (!SrcVT.isVector())
- break;
// Don't bother broadcasting if we just need the 0'th element.
if (DemandedElts == 1) {
+ if (!SrcVT.isVector())
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op), VT, Src));
if (Src.getValueType() != VT)
Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
SDLoc(Op));
return TLO.CombineTo(Op, Src);
}
+ if (!SrcVT.isVector())
+ break;
APInt SrcUndef, SrcZero;
APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
diff --git a/llvm/test/CodeGen/X86/pr29222.ll b/llvm/test/CodeGen/X86/pr29222.ll
index 6b8ac918386faf..9814361404f2d4 100644
--- a/llvm/test/CodeGen/X86/pr29222.ll
+++ b/llvm/test/CodeGen/X86/pr29222.ll
@@ -54,9 +54,7 @@ define i32 @PR29222(i32) nounwind {
; X64-AVX-NEXT: movd %edi, %mm0
; X64-AVX-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
; X64-AVX-NEXT: packsswb %mm0, %mm0
-; X64-AVX-NEXT: movq %mm0, %rax
-; X64-AVX-NEXT: vmovq %rax, %xmm0
-; X64-AVX-NEXT: vpbroadcastq %xmm0, %xmm0
+; X64-AVX-NEXT: movq2dq %mm0, %xmm0
; X64-AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/shuffle-half.ll b/llvm/test/CodeGen/X86/shuffle-half.ll
index 001db2c7cecae1..0aa8c68764f828 100644
--- a/llvm/test/CodeGen/X86/shuffle-half.ll
+++ b/llvm/test/CodeGen/X86/shuffle-half.ll
@@ -225,7 +225,7 @@ define <32 x half> @build_vec(ptr %p, <32 x i1> %mask) {
; CHECK-NEXT: testl $65536, %eax # imm = 0x10000
; CHECK-NEXT: je .LBB1_35
; CHECK-NEXT: .LBB1_34: # %cond.load46
-; CHECK-NEXT: vpbroadcastw 32(%rdi), %xmm1
+; CHECK-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm1
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
More information about the llvm-commits
mailing list