[llvm] 0e89ff8 - [X86] SimplifyDemandedBits - only narrow a broadcast source if we only have one use.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 19 14:54:15 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-19T22:53:30+01:00
New Revision: 0e89ff8195e994e5051f19669e1044d47120ac06
URL: https://github.com/llvm/llvm-project/commit/0e89ff8195e994e5051f19669e1044d47120ac06
DIFF: https://github.com/llvm/llvm-project/commit/0e89ff8195e994e5051f19669e1044d47120ac06.diff
LOG: [X86] SimplifyDemandedBits - only narrow a broadcast source if we only have one use.
Helps with the regression noted on D109065 - don't truncate a broadcast source if the source has multiple uses.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/oddshuffles.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3187c41a4ab1..3520bbfe1983 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40381,7 +40381,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Don't attempt this on AVX512 as it might affect broadcast folding.
// TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
- OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) {
+ OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
+ Src->hasOneUse()) {
MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
SDValue NewSrc =
TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src);
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 0a3ddb81d345..b7f7321a8d3d 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -2261,13 +2261,10 @@ define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) {
;
; AVX1-LABEL: splat_v3i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: movq (%rdi), %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3,4,5,6,7]
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7]
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: splat_v3i32:
@@ -2289,13 +2286,10 @@ define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) {
;
; XOP-LABEL: splat_v3i32:
; XOP: # %bb.0:
-; XOP-NEXT: movq (%rdi), %rax
-; XOP-NEXT: vmovq %rax, %xmm0
-; XOP-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3,4,5,6,7]
-; XOP-NEXT: vmovd %eax, %xmm2
-; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7]
+; XOP-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7]
+; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7]
; XOP-NEXT: retq
%1 = load <3 x i32>, <3 x i32>* %ptr, align 1
%2 = shufflevector <3 x i32> %1, <3 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
More information about the llvm-commits
mailing list