[llvm] [X86] Merge insertsubvector(load(p0),load_subv(p0),hi) -> subvbroadcast(p0) if either load is oneuse (PR #128857)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 03:09:34 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/128857
This fold is currently limited to cases where the load_subv(p0) has oneuse, but its beneficial if either load has oneuse and will be replaced.
Yes another yak shave for #122671
>From 1e181624b0a9fa3c7df3d805a906c4da618557d2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 26 Feb 2025 11:08:50 +0000
Subject: [PATCH] [X86] Merge insertsubvector(load(p0),load_subv(p0),hi) ->
subvbroadcast(p0) if either load is oneuse
This fold is currently limited to cases where the load_subv(p0) has oneuse, but its beneficial if either load has oneuse and will be replaced.
Yes another yak shave for #122671
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +++--
.../X86/vector-interleaved-store-i8-stride-7.ll | 12 ++++--------
2 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 84aaf86550842..2791021ee0f10 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58562,8 +58562,9 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
// If we're splatting the lower half subvector of a full vector load into the
// upper half, attempt to create a subvector broadcast.
- if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() &&
- Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) {
+ if (IdxVal == (OpVT.getVectorNumElements() / 2) &&
+ Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits()) &&
+ (Vec.hasOneUse() || SubVec.hasOneUse())) {
auto *VecLd = dyn_cast<LoadSDNode>(Vec);
auto *SubLd = dyn_cast<LoadSDNode>(SubVec);
if (VecLd && SubLd &&
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
index c1dba071b4353..b6cbddd667d8e 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
@@ -10210,11 +10210,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-NEXT: vpshufb %ymm28, %ymm18, %ymm23
; AVX512BW-NEXT: vporq %ymm2, %ymm23, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512BW-NEXT: vmovdqa64 (%rdx), %zmm2
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm14, %zmm2, %zmm2
+; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm20, %zmm2, %zmm2
-; AVX512BW-NEXT: vmovdqa64 (%rcx), %zmm20
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm15, %zmm20, %zmm20
+; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm20 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm22, %zmm20, %zmm20
; AVX512BW-NEXT: vporq %zmm2, %zmm20, %zmm2
; AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,2,3,6,7,6,7]
@@ -10816,11 +10814,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-BW-NEXT: vpshufb %ymm28, %ymm18, %ymm23
; AVX512DQ-BW-NEXT: vporq %ymm2, %ymm23, %ymm2
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512DQ-BW-NEXT: vmovdqa64 (%rdx), %zmm2
-; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm14, %zmm2, %zmm2
+; AVX512DQ-BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512DQ-BW-NEXT: vpshufb %zmm20, %zmm2, %zmm2
-; AVX512DQ-BW-NEXT: vmovdqa64 (%rcx), %zmm20
-; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm15, %zmm20, %zmm20
+; AVX512DQ-BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm20 = mem[0,1,2,3,0,1,2,3]
; AVX512DQ-BW-NEXT: vpshufb %zmm22, %zmm20, %zmm20
; AVX512DQ-BW-NEXT: vporq %zmm2, %zmm20, %zmm2
; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,2,3,6,7,6,7]
More information about the llvm-commits
mailing list