[PATCH] D108411: [X86][TLI] SimplifyDemandedVectorEltsForTargetNode(): don't break apart broadcasts from which not just the 0'th elt is demanded
Roman Lebedev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 23 06:32:07 PDT 2021
lebedev.ri updated this revision to Diff 368088.
lebedev.ri added a comment.
Rebased, NFC.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D108411/new/
https://reviews.llvm.org/D108411
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
llvm/test/CodeGen/X86/horizontal-sum.ll
llvm/test/CodeGen/X86/sse41.ll
Index: llvm/test/CodeGen/X86/sse41.ll
===================================================================
--- llvm/test/CodeGen/X86/sse41.ll
+++ llvm/test/CodeGen/X86/sse41.ll
@@ -645,8 +645,7 @@
; X86-AVX1-LABEL: pinsrd_from_shufflevector_i32:
; X86-AVX1: ## %bb.0: ## %entry
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vpermilps $0, (%eax), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x08,0x00]
-; X86-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
+; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x08]
; X86-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
@@ -669,8 +668,7 @@
;
; X64-AVX1-LABEL: pinsrd_from_shufflevector_i32:
; X64-AVX1: ## %bb.0: ## %entry
-; X64-AVX1-NEXT: vpermilps $0, (%rdi), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x0f,0x00]
-; X64-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
+; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x0f]
; X64-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
Index: llvm/test/CodeGen/X86/horizontal-sum.ll
===================================================================
--- llvm/test/CodeGen/X86/horizontal-sum.ll
+++ llvm/test/CodeGen/X86/horizontal-sum.ll
@@ -752,9 +752,9 @@
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3]
-; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm4
-; AVX2-SLOW-NEXT: vpbroadcastq %xmm3, %xmm5
-; AVX2-SLOW-NEXT: vpaddd %xmm4, %xmm5, %xmm4
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
+; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm5
+; AVX2-SLOW-NEXT: vpaddd %xmm5, %xmm4, %xmm4
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3]
Index: llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
===================================================================
--- llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
+++ llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
@@ -284,7 +284,7 @@
define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
+; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39816,6 +39816,12 @@
}
}
+ // For broadcasts, unless we *only* demand the 0'th element,
+ // stop attempts at simplification here, we aren't going to improve things,
+ // this is better than any potential shuffle.
+ if (isTargetShuffleSplat(Op) && !DemandedElts.isOneValue())
+ return false;
+
// Get target/faux shuffle mask.
APInt OpUndef, OpZero;
SmallVector<int, 64> OpMask;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108411.368088.patch
Type: text/x-patch
Size: 3654 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210823/f665c4ee/attachment.bin>
More information about the llvm-commits
mailing list