[llvm] r322730 - [X86] Teach LowerBUILD_VECTOR to recognize pair-wise splats of 32-bit elements and use a 64-bit broadcast
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 17 10:58:22 PST 2018
Author: ctopper
Date: Wed Jan 17 10:58:22 2018
New Revision: 322730
URL: http://llvm.org/viewvc/llvm-project?rev=322730&view=rev
Log:
[X86] Teach LowerBUILD_VECTOR to recognize pair-wise splats of 32-bit elements and use a 64-bit broadcast
If we are splatting pairs of 32-bit elements, we can use a 64-bit broadcast to get the job done.
We could probably could probably do this with other sizes too, for example four 16-bit elements. Or we could broadcast pairs of 16-bit elements using a 32-bit element broadcast. But I've left that as a future improvement.
I've also restricted this to AVX2 only because we can only broadcast loads under AVX.
Differential Revision: https://reviews.llvm.org/D42086
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll
llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 17 10:58:22 2018
@@ -8117,6 +8117,32 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
return LD;
}
+ // If this is a splat of pairs of 32-bit elements, we can use a narrower
+ // build_vector and broadcast it.
+ // TODO: We could probably generalize this more.
+ if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
+ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
+ DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
+ // Make sure all the even/odd operands match.
+ for (unsigned i = 2; i != NumElems; ++i)
+ if (Ops[i % 2] != Op.getOperand(i))
+ return false;
+ return true;
+ };
+ if (CanSplat(Op, NumElems, Ops)) {
+ MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64;
+ MVT NarrowVT = MVT::getVectorVT(EltVT, 4);
+ // Create a new build vector and cast to v2i64/v2f64.
+ SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2),
+ DAG.getBuildVector(NarrowVT, dl, Ops));
+ // Broadcast from v2i64/v2f64 and cast to final VT.
+ MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2);
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,
+ NewBV));
+ }
+ }
+
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
if (VT.getSizeInBits() > 128) {
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Wed Jan 17 10:58:22 2018
@@ -189,12 +189,7 @@ define <2 x i64> @Q64(i64* %ptr) nounwin
; X32-LABEL: Q64:
; X32: ## %bb.0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vpbroadcastq (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: Q64:
@@ -212,13 +207,8 @@ define <4 x i64> @QQ64(i64* %ptr) nounwi
; X32-LABEL: QQ64:
; X32: ## %bb.0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: QQ64:
@@ -1380,12 +1370,8 @@ define void @isel_crash_2q(i64* %cV_R.ad
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vmovaps %xmm0, (%esp)
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: vpbroadcastq %xmm1, %xmm1
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
@@ -1438,15 +1424,10 @@ define void @isel_crash_4q(i64* %cV_R.ad
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: vbroadcastsd %xmm1, %ymm1
; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Wed Jan 17 10:58:22 2018
@@ -485,16 +485,11 @@ entry:
define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm512_mask_set1_epi64:
; X32: # %bb.0: # %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
-; X32-NEXT: vmovd %edx, %xmm1
-; X32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X32-NEXT: kmovw %eax, %k1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm0 {%k1}
+; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_set1_epi64:
@@ -513,16 +508,11 @@ entry:
define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm512_maskz_set1_epi64:
; X32: # %bb.0: # %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
-; X32-NEXT: vmovd %edx, %xmm0
-; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X32-NEXT: kmovw %eax, %k1
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
+; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_set1_epi64:
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Wed Jan 17 10:58:22 2018
@@ -797,16 +797,11 @@ entry:
define <4 x i64> @test_mm256_mask_set1_epi64(<4 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm256_mask_set1_epi64:
; X32: # %bb.0: # %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
-; X32-NEXT: kmovw %edx, %k1
-; X32-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1}
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_set1_epi64:
@@ -826,16 +821,11 @@ entry:
define <4 x i64> @test_mm256_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm256_maskz_set1_epi64:
; X32: # %bb.0: # %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: kmovw %edx, %k1
-; X32-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_set1_epi64:
Modified: llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll Wed Jan 17 10:58:22 2018
@@ -122,9 +122,7 @@ define <8 x i64> @test_mm512_epi64(<8 x
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
-; X86-AVX512VLCDBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %zmm0
; X86-AVX512VLCDBW-NEXT: retl
entry:
%0 = icmp eq <8 x i32> %a, %b
@@ -160,8 +158,7 @@ define <4 x i64> @test_mm256_epi64(<8 x
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
-; X86-AVX512VLCDBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %ymm0
; X86-AVX512VLCDBW-NEXT: retl
entry:
%0 = icmp eq <8 x i32> %a, %b
Modified: llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll Wed Jan 17 10:58:22 2018
@@ -103,14 +103,9 @@ define <8 x i64> @insert_subvector_into_
; X32_AVX256-NEXT: subl $8, %esp
; X32_AVX256-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32_AVX256-NEXT: vmovlps %xmm0, (%esp)
-; X32_AVX256-NEXT: movl (%esp), %eax
-; X32_AVX256-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32_AVX256-NEXT: vmovd %eax, %xmm0
-; X32_AVX256-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32_AVX256-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; X32_AVX256-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
-; X32_AVX256-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32_AVX256-NEXT: vmovdqa %ymm0, %ymm1
+; X32_AVX256-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32_AVX256-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32_AVX256-NEXT: vmovaps %ymm0, %ymm1
; X32_AVX256-NEXT: movl %ebp, %esp
; X32_AVX256-NEXT: popl %ebp
; X32_AVX256-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=322730&r1=322729&r2=322730&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Wed Jan 17 10:58:22 2018
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32 --check-prefix=X86AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X32 --check-prefix=X86AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64 --check-prefix=X64AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X64 --check-prefix=X64AVX2
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
@@ -320,20 +320,35 @@ define <4 x i32> @combine_vpperm_10zz32B
; FIXME: Duplicated load in i686
define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
-; X32-LABEL: buildvector_v4f32_0404:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; X32-NEXT: vmovaps %xmm0, (%eax)
-; X32-NEXT: retl
-;
-; X64-LABEL: buildvector_v4f32_0404:
-; X64: # %bb.0:
-; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
-; X64-NEXT: vmovaps %xmm0, (%rdi)
-; X64-NEXT: retq
+; X86AVX-LABEL: buildvector_v4f32_0404:
+; X86AVX: # %bb.0:
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; X86AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X86AVX-NEXT: vmovaps %xmm0, (%eax)
+; X86AVX-NEXT: retl
+;
+; X86AVX2-LABEL: buildvector_v4f32_0404:
+; X86AVX2: # %bb.0:
+; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X86AVX2-NEXT: vmovapd %xmm0, (%eax)
+; X86AVX2-NEXT: retl
+;
+; X64AVX-LABEL: buildvector_v4f32_0404:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
+; X64AVX-NEXT: vmovaps %xmm0, (%rdi)
+; X64AVX-NEXT: retq
+;
+; X64AVX2-LABEL: buildvector_v4f32_0404:
+; X64AVX2: # %bb.0:
+; X64AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X64AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64AVX2-NEXT: vmovapd %xmm0, (%rdi)
+; X64AVX2-NEXT: retq
%v0 = insertelement <4 x float> undef, float %a, i32 0
%v1 = insertelement <4 x float> %v0, float %b, i32 1
%v2 = insertelement <4 x float> %v1, float %a, i32 2
More information about the llvm-commits
mailing list