[llvm] [X86] combineEXTRACT_SUBVECTOR - generalize extract_subvector(broadcast(x),c) fold with IsElementEquivalent (PR #141963)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 08:17:23 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Instead of matching the broadcast nodes directly, let IsElementEquivalent handle it to allow BITCAST handling, which we already have with IsElementEquivalent
---
Full diff: https://github.com/llvm/llvm-project/pull/141963.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+10-9)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+8-24)
- (modified) llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll (+4-8)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b71f49165c60..be1939eb95fa1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59549,6 +59549,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
unsigned SizeInBits = VT.getSizeInBits();
unsigned InSizeInBits = InVecVT.getSizeInBits();
unsigned NumSubElts = VT.getVectorNumElements();
+ unsigned NumInElts = InVecVT.getVectorNumElements();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(N);
@@ -59615,22 +59616,22 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
}
}
- // If we're extracting an upper subvector from a broadcast we should just
- // extract the lowest subvector instead which should allow
+ // If we're extracting an upper subvector see if we'd get the same elements if
+ // we extracted the lowest subvector instead which should allow
// SimplifyDemandedVectorElts do more simplifications.
- if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
- InVec.getOpcode() == X86ISD::VBROADCAST_LOAD ||
- DAG.isSplatValue(InVec, /*AllowUndefs*/ false)))
- return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
+ if (IdxVal != 0) {
+ bool AllEquiv = all_of(seq<unsigned>(NumSubElts), [&](unsigned I) {
+ return IsElementEquivalent(NumInElts, InVec, InVec, I, I + IdxVal);
+ });
+ if (AllEquiv)
+ return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
+ }
// Check if we're extracting a whole broadcasted subvector.
if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
EVT MemVT = MemIntr->getMemoryVT();
if (MemVT == VT) {
- // Just use the lowest subvector.
- if (IdxVal != 0)
- return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
// If this is the only use, we can replace with a regular load (this may
// have been missed by SimplifyDemandedVectorElts due to extra uses of the
// memory chain).
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index d6e6ad184e03f..f2e4da0ac5400 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -3951,7 +3951,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; AVX512F-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3962,7 +3961,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; AVX512DQ-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4004,7 +4002,7 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
;
; AVX-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
@@ -4013,7 +4011,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
; AVX-NEXT: vmovdqa %xmm3, 16(%rdx)
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: vmovdqa %xmm1, 48(%rdx)
-; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX2-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
@@ -4029,7 +4026,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
; AVX512F-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4040,7 +4036,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
; AVX512DQ-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4082,7 +4077,7 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
;
; AVX-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
@@ -4091,7 +4086,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
; AVX-NEXT: vmovdqa %xmm3, 16(%rdx)
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: vmovdqa %xmm1, 48(%rdx)
-; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX2-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
@@ -4107,7 +4101,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
; AVX512F-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4118,7 +4111,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
; AVX512DQ-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4184,7 +4176,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
; AVX512F-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4195,7 +4186,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
; AVX512DQ-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4338,7 +4328,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
; AVX512F-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4349,7 +4338,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
; AVX512DQ-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4418,7 +4406,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
; AVX512F-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4429,7 +4416,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
; AVX512DQ-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4497,7 +4483,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
; AVX512F-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4508,7 +4493,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
; AVX512DQ-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4654,7 +4638,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
;
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0
+; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4664,7 +4648,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
;
; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0
+; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4731,7 +4715,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512F-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0
+; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4741,7 +4725,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512DQ-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0
+; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4886,7 +4870,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq (%rdi), %zmm0
+; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4896,7 +4880,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastq (%rdi), %zmm0
+; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
index 40ad731bff85c..fcde7ffe30ff2 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
@@ -134,9 +134,8 @@ define <16 x i32> @test_broadcast_4i32_16i32(ptr%p) nounwind {
define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
@@ -149,9 +148,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
@@ -164,9 +162,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
@@ -179,9 +176,8 @@ define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
``````````
</details>
https://github.com/llvm/llvm-project/pull/141963
More information about the llvm-commits
mailing list