[llvm] 5f8cf33 - [X86] IsElementEquivalent - add handling for X86ISD::VPERMI nodes. (#142767)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 05:47:48 PDT 2025
Author: Simon Pilgrim
Date: 2025-06-04T13:47:44+01:00
New Revision: 5f8cf33650c0c71119ecf81f07014329d9466d0d
URL: https://github.com/llvm/llvm-project/commit/5f8cf33650c0c71119ecf81f07014329d9466d0d
DIFF: https://github.com/llvm/llvm-project/commit/5f8cf33650c0c71119ecf81f07014329d9466d0d.diff
LOG: [X86] IsElementEquivalent - add handling for X86ISD::VPERMI nodes. (#142767)
On AVX2+ targets these are often used to splat subvectors.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2399936ffd827..24df848f87b9b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9830,6 +9830,17 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
return (Idx % NumMemElts) == (ExpectedIdx % NumMemElts);
}
break;
+ case X86ISD::VPERMI: {
+ if (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize) {
+ SmallVector<int, 8> Mask;
+ DecodeVPERMMask(MaskSize, Op.getConstantOperandVal(1), Mask);
+ SDValue Src = Op.getOperand(0);
+ return (Mask[Idx] == Mask[ExpectedIdx]) ||
+ IsElementEquivalent(MaskSize, Src, Src, Mask[Idx],
+ Mask[ExpectedIdx]);
+ }
+ break;
+ }
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index 1fada58f05ba9..7d2915ddc75b1 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -1952,7 +1952,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1965,7 +1965,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512F-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX512F-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512F-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX512F-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1991,7 +1991,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512DQ-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512DQ-SLOW-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX512DQ-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512DQ-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX512DQ-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -2016,7 +2016,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512BW-SLOW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-SLOW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-SLOW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-SLOW-NEXT: vpbroadcastq %xmm0, %ymm0
+; AVX512BW-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX512BW-SLOW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4209,14 +4209,13 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd %xmm0, %xmm2
; AVX2-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
-; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rdx), %ymm2, %ymm1
-; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rcx)
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
+; AVX2-NEXT: vpaddb (%rdx), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rcx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rcx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index 7fcca526e460c..f5802150d5353 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1569,7 +1569,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512F-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1578,7 +1578,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512DQ-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1587,7 +1587,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512BW-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512BW-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
diff --git a/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll b/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
index d9393ba9febb2..edd3933fcfc28 100644
--- a/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
+++ b/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
@@ -342,7 +342,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary(<
; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
ret <4 x i64> %r
@@ -597,8 +597,8 @@ define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary(<8
define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
; CHECK-SLOW: # %bb.0:
-; CHECK-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
-; CHECK-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
+; CHECK-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
+; CHECK-SLOW-NEXT: vbroadcastss %xmm1, %ymm1
; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
; CHECK-SLOW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 7fbb211b69ccf..bc83cc1cab42d 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -1952,7 +1952,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1965,7 +1965,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512F-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX512F-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512F-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX512F-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1991,7 +1991,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512DQ-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512DQ-SLOW-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX512DQ-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512DQ-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX512DQ-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -2016,7 +2016,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512BW-SLOW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-SLOW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-SLOW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-SLOW-NEXT: vpbroadcastq %xmm0, %ymm0
+; AVX512BW-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX512BW-SLOW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4649,11 +4649,10 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-SLOW-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX2-SLOW-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %ymm2
-; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
+; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
-; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX2-SLOW-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -4669,7 +4668,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-FAST-PERLANE-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-FAST-PERLANE-NEXT: vpbroadcastq %xmm0, %ymm2
+; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm2
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -4687,7 +4686,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-FAST-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX2-FAST-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpbroadcastq %xmm0, %ymm2
+; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm2
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -6582,7 +6581,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 45d589b6c988e..266b06a23df94 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1569,7 +1569,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512F-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1578,7 +1578,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512DQ-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1587,7 +1587,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX512BW-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512BW-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
More information about the llvm-commits
mailing list