[llvm] [X86] widenSubVector - widen from smaller build vector if the upper elements are already the same padding elements (PR #122445)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 04:27:47 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/122445.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+13-2)
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (+1-2)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b0eb38e7e0952..fbfcfc700ed62d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4144,9 +4144,20 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl) {
- assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() &&
- Vec.getValueType().getScalarType() == VT.getScalarType() &&
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.getFixedSizeInBits() <= VT.getFixedSizeInBits() &&
+ VecVT.getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type");
+ // If the upper 128-bits of a build vector are already undef/zero, then try to
+ // widen from the lower 128-bits.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR && VecVT.is256BitVector()) {
+ unsigned NumSrcElts = VecVT.getVectorNumElements();
+ ArrayRef<SDUse> Hi = Vec->ops().drop_front(NumSrcElts / 2);
+ if (all_of(Hi, [&](SDValue V) {
+ return V.isUndef() || (ZeroNewElements && X86::isZeroNode(V));
+ }))
+ Vec = extract128BitVector(Vec, 0, DAG, dl);
+ }
SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
: DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
index 445468d06fb042..e7557134b14864 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
@@ -442,7 +442,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512BW-LABEL: PR34175:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
+; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
@@ -461,7 +461,7 @@ define <4 x double> @PR34175(ptr %p) {
;
; AVX512VBMI-LABEL: PR34175:
; AVX512VBMI: # %bb.0:
-; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
+; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 81ce14132c8799..05071064fc60e1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -598,8 +598,7 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1]
; X64-AVX512-NEXT: vpermi2pd %zmm0, %zmm4, %zmm3
; X64-AVX512-NEXT: vmovapd %ymm3, (%rsi)
-; X64-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,11,3,11]
-; X64-AVX512-NEXT: # ymm3 = mem[0,1,0,1]
+; X64-AVX512-NEXT: vmovapd {{.*#+}} xmm3 = [3,11]
; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3]
; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0
``````````
</details>
https://github.com/llvm/llvm-project/pull/122445
More information about the llvm-commits
mailing list