[llvm] 72a049d - [X86][AVX2] LowerINSERT_VECTOR_ELT - support v4i64 insertion as BLENDI(X, SCALAR_TO_VECTOR(Y))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 13:18:28 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-09T21:18:10+01:00
New Revision: 72a049d77844d8da44613e4d9f109bd39e802602
URL: https://github.com/llvm/llvm-project/commit/72a049d77844d8da44613e4d9f109bd39e802602
DIFF: https://github.com/llvm/llvm-project/commit/72a049d77844d8da44613e4d9f109bd39e802602.diff
LOG: [X86][AVX2] LowerINSERT_VECTOR_ELT - support v4i64 insertion as BLENDI(X, SCALAR_TO_VECTOR(Y))
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx-insertelt.ll
llvm/test/CodeGen/X86/combine-mul.ll
llvm/test/CodeGen/X86/splat-for-size.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 89be04674c0b3..927484194990f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19773,7 +19773,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// and incur a domain crossing penalty if that's what we'll end up
// doing anyway after extracting to a 128-bit vector.
if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
- (Subtarget.hasAVX2() && EltVT == MVT::i32)) {
+ (Subtarget.hasAVX2() && (EltVT == MVT::i32 || EltVT == MVT::i64))) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
DAG.getTargetConstant(1, dl, MVT::i8));
diff --git a/llvm/test/CodeGen/X86/avx-insertelt.ll b/llvm/test/CodeGen/X86/avx-insertelt.ll
index 1bca2df5d9ce9..23b1a0312394b 100644
--- a/llvm/test/CodeGen/X86/avx-insertelt.ll
+++ b/llvm/test/CodeGen/X86/avx-insertelt.ll
@@ -81,8 +81,8 @@ define <4 x i64> @insert_i64_firstelt_of_low_subvector(<4 x i64> %x, i64 %s) {
;
; AVX2-LABEL: insert_i64_firstelt_of_low_subvector:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: vmovq %rdi, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX2-NEXT: retq
%i0 = insertelement <4 x i64> %x, i64 %s, i32 0
ret <4 x i64> %i0
@@ -312,11 +312,9 @@ define <4 x i64> @insert_i64_firstelts(<4 x i64> %x, i64 %s) {
;
; AVX2-LABEL: insert_i64_firstelts:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vmovq %rdi, %xmm1
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
; AVX2-NEXT: retq
%i0 = insertelement <4 x i64> %x, i64 %s, i32 0
%i1 = insertelement <4 x i64> %i0, i64 %s, i32 2
@@ -532,7 +530,7 @@ define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) {
;
; AVX2-LABEL: insert_i64_two_elts_of_low_subvector:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
+; AVX2-NEXT: vmovq %rdi, %xmm1
; AVX2-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
index 0d0aff21550a5..da8ba36599142 100644
--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -477,8 +477,8 @@ define <4 x i64> @fuzz15429(<4 x i64> %InVec) {
; AVX: # %bb.0:
; AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT: vpinsrq $0, %rax, %xmm0, %xmm1
-; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX-NEXT: retq
%mul = mul <4 x i64> %InVec, <i64 1, i64 2, i64 4, i64 8>
%I = insertelement <4 x i64> %mul, i64 9223372036854775807, i64 0
diff --git a/llvm/test/CodeGen/X86/splat-for-size.ll b/llvm/test/CodeGen/X86/splat-for-size.ll
index 4d986f67be21d..d22c5cc378e92 100644
--- a/llvm/test/CodeGen/X86/splat-for-size.ll
+++ b/llvm/test/CodeGen/X86/splat-for-size.ll
@@ -385,13 +385,20 @@ define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
@A = common dso_local global <3 x i64> zeroinitializer, align 32
define <8 x i64> @pr23259() #1 {
-; CHECK-LABEL: pr23259:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovaps A+16(%rip), %xmm0
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
-; CHECK-NEXT: retq
+; AVX-LABEL: pr23259:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps A+16(%rip), %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: pr23259:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vmovaps A+16(%rip), %xmm0
+; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
+; AVX2-NEXT: retq
entry:
%0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
%1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index 0aff325a58960..4e661d8102355 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -140,10 +140,16 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x
; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
; X86-NEXT: retl
;
-; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
-; X64: # %bb.0:
-; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
-; X64-NEXT: retq
+; X64-AVX-LABEL: demandedelts_vpermil2pd256_as_shufpd:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX2-LABEL: demandedelts_vpermil2pd256_as_shufpd:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
+; X64-AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
+; X64-AVX2-NEXT: retq
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)
%res2 = shufflevector <4 x double> %res1, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list