[llvm] r313509 - [X86] Teach the execution domain fixing tables to use movlhps inplace of unpcklpd for the packed single domain.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 17 21:40:59 PDT 2017


Author: ctopper
Date: Sun Sep 17 21:40:58 2017
New Revision: 313509

URL: http://llvm.org/viewvc/llvm-project?rev=313509&view=rev
Log:
[X86] Teach the execution domain fixing tables to use movlhps inplace of unpcklpd for the packed single domain.

MOVLHPS has a smaller encoding than UNPCKLPD in the legacy encodings. With VEX and EVEX encodings it doesn't matter.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx-unpack.ll
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffles/unpack.ll
    llvm/trunk/test/CodeGen/X86/build-vector-128.ll
    llvm/trunk/test/CodeGen/X86/build-vector-256.ll
    llvm/trunk/test/CodeGen/X86/build-vector-512.ll
    llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
    llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
    llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll
    llvm/trunk/test/CodeGen/X86/combine-or.ll
    llvm/trunk/test/CodeGen/X86/dagcombine-buildvector.ll
    llvm/trunk/test/CodeGen/X86/haddsub-2.ll
    llvm/trunk/test/CodeGen/X86/haddsub-undef.ll
    llvm/trunk/test/CodeGen/X86/half.ll
    llvm/trunk/test/CodeGen/X86/horizontal-shuffle.ll
    llvm/trunk/test/CodeGen/X86/i64-to-float.ll
    llvm/trunk/test/CodeGen/X86/masked_memop.ll
    llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2.ll
    llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll
    llvm/trunk/test/CodeGen/X86/sse41.ll
    llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-2.ll
    llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
    llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
    llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll
    llvm/trunk/test/CodeGen/X86/vselect.ll
    llvm/trunk/test/CodeGen/X86/widen_extract-1.ll
    llvm/trunk/test/CodeGen/X86/xop-mask-comments.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Sep 17 21:40:58 2017
@@ -9336,7 +9336,7 @@ static const uint16_t ReplaceableInstrs[
   { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
   { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
   { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
-  { X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
+  { X86::MOVLHPSrr,  X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
   { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
   { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
   { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
@@ -9364,7 +9364,7 @@ static const uint16_t ReplaceableInstrs[
   { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
   { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
   { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
-  { X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
+  { X86::VMOVLHPSrr,  X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
   { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
   { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
   { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
@@ -9438,7 +9438,7 @@ static const uint16_t ReplaceableInstrs[
   { X86::VUNPCKHPSZ256rm,    X86::VUNPCKHPSZ256rm,    X86::VPUNPCKHDQZ256rm },
   { X86::VUNPCKHPSZ256rr,    X86::VUNPCKHPSZ256rr,    X86::VPUNPCKHDQZ256rr },
   { X86::VUNPCKLPDZ128rm,    X86::VUNPCKLPDZ128rm,    X86::VPUNPCKLQDQZ128rm },
-  { X86::VUNPCKLPDZ128rr,    X86::VUNPCKLPDZ128rr,    X86::VPUNPCKLQDQZ128rr },
+  { X86::VMOVLHPSZrr,        X86::VUNPCKLPDZ128rr,    X86::VPUNPCKLQDQZ128rr },
   { X86::VUNPCKHPDZ128rm,    X86::VUNPCKHPDZ128rm,    X86::VPUNPCKHQDQZ128rm },
   { X86::VUNPCKHPDZ128rr,    X86::VUNPCKHPDZ128rr,    X86::VPUNPCKHQDQZ128rr },
   { X86::VUNPCKLPSZ128rm,    X86::VUNPCKLPSZ128rm,    X86::VPUNPCKLDQZ128rm },

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sun Sep 17 21:40:58 2017
@@ -2246,15 +2246,15 @@ define <4 x double> @test_mm256_set_pd(d
 ; X32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; X32-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
 ; X32-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_set_pd:
 ; X64:       # BB#0:
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
   %res0 = insertelement <4 x double> undef, double %a3, i32 0
@@ -2883,15 +2883,15 @@ define <4 x double> @test_mm256_setr_pd(
 ; X32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; X32-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
 ; X32-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_setr_pd:
 ; X64:       # BB#0:
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; X64-NEXT:    retq
   %res0 = insertelement <4 x double> undef, double %a0, i32 0

Modified: llvm/trunk/test/CodeGen/X86/avx-unpack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-unpack.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-unpack.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-unpack.ll Sun Sep 17 21:40:58 2017
@@ -52,7 +52,7 @@ define <4 x double> @unpacklopd_not(<4 x
 ; CHECK-LABEL: unpacklopd_not:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -76,7 +76,7 @@ define <4 x double> @unpackhipd_not(<4 x
 ; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Sun Sep 17 21:40:58 2017
@@ -25,25 +25,25 @@ define <8 x double> @sltof864(<8 x i64>
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; NODQ-NEXT:    vmovq %xmm1, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; NODQ-NEXT:    retq
@@ -64,12 +64,12 @@ define <4 x double> @slto4f64(<4 x i64>
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; NODQ-NEXT:    vmovq %xmm1, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; NODQ-NEXT:    vmovq %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; NODQ-NEXT:    retq
 ;
@@ -95,7 +95,7 @@ define <2 x double> @slto2f64(<2 x i64>
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; NODQ-NEXT:    vmovq %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; NODQ-NEXT:    retq
 ;
 ; VLDQ-LABEL: slto2f64:
@@ -425,25 +425,25 @@ define <8 x double> @ulto8f64(<8 x i64>
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
 ; NODQ-NEXT:    vmovq %xmm1, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm0, %rax
 ; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; NODQ-NEXT:    retq
@@ -466,11 +466,11 @@ define <16 x double> @ulto16f64(<16 x i6
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
 ; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm4
 ; KNL-NEXT:    vpextrq $1, %xmm4, %rax
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
 ; KNL-NEXT:    vmovq %xmm4, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
@@ -478,11 +478,11 @@ define <16 x double> @ulto16f64(<16 x i6
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm0, %rax
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm0, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; KNL-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
@@ -490,25 +490,25 @@ define <16 x double> @ulto16f64(<16 x i6
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; KNL-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vpextrq $1, %xmm1, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm1, %rax
 ; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; KNL-NEXT:    retq
@@ -526,25 +526,25 @@ define <16 x double> @ulto16f64(<16 x i6
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
 ; VLNODQ-NEXT:    vmovq %xmm2, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; VLNODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
@@ -552,25 +552,25 @@ define <16 x double> @ulto16f64(<16 x i6
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
 ; VLNODQ-NEXT:    vmovq %xmm2, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; VLNODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; VLNODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm1, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; VLNODQ-NEXT:    retq
@@ -582,25 +582,25 @@ define <16 x double> @ulto16f64(<16 x i6
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
 ; AVX512BW-NEXT:    vmovq %xmm2, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
@@ -608,25 +608,25 @@ define <16 x double> @ulto16f64(<16 x i6
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
 ; AVX512BW-NEXT:    vmovq %xmm2, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm1, %rax
 ; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    retq
@@ -1354,25 +1354,25 @@ define <8 x double> @slto8f64(<8 x i64>
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; NODQ-NEXT:    vmovq %xmm1, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
 ; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm2, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
 ; NODQ-NEXT:    vmovq %xmm0, %rax
 ; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; NODQ-NEXT:    retq
@@ -1395,11 +1395,11 @@ define <16 x double> @slto16f64(<16 x i6
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
 ; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm4
 ; KNL-NEXT:    vpextrq $1, %xmm4, %rax
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
 ; KNL-NEXT:    vmovq %xmm4, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
@@ -1407,11 +1407,11 @@ define <16 x double> @slto16f64(<16 x i6
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm0, %rax
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm0, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; KNL-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
@@ -1419,25 +1419,25 @@ define <16 x double> @slto16f64(<16 x i6
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
 ; KNL-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; KNL-NEXT:    vpextrq $1, %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm3, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; KNL-NEXT:    vpextrq $1, %xmm1, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; KNL-NEXT:    vmovq %xmm1, %rax
 ; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
-; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; KNL-NEXT:    retq
@@ -1455,25 +1455,25 @@ define <16 x double> @slto16f64(<16 x i6
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
 ; VLNODQ-NEXT:    vmovq %xmm2, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; VLNODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
@@ -1481,25 +1481,25 @@ define <16 x double> @slto16f64(<16 x i6
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
 ; VLNODQ-NEXT:    vmovq %xmm2, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; VLNODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; VLNODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm3, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; VLNODQ-NEXT:    vmovq %xmm1, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; VLNODQ-NEXT:    retq
@@ -1511,25 +1511,25 @@ define <16 x double> @slto16f64(<16 x i6
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
 ; AVX512BW-NEXT:    vmovq %xmm2, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
@@ -1537,25 +1537,25 @@ define <16 x double> @slto16f64(<16 x i6
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
 ; AVX512BW-NEXT:    vmovq %xmm2, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
 ; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm3, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
 ; AVX512BW-NEXT:    vmovq %xmm1, %rax
 ; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
-; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    retq
@@ -2241,7 +2241,7 @@ define <2 x double> @sbto2f64(<2 x doubl
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm1
 ; VLNODQ-NEXT:    vmovq %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; VLNODQ-NEXT:    retq
   %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
   %1 = sitofp <2 x i1> %cmpres to <2 x double>
@@ -2593,7 +2593,7 @@ define <2 x double> @ubto2f64(<2 x i32>
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm1
 ; VLNODQ-NEXT:    vmovq %xmm0, %rax
 ; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm0
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VLNODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; VLNODQ-NEXT:    retq
   %mask = icmp ult <2 x i32> %a, zeroinitializer
   %1 = uitofp <2 x i1> %mask to <2 x double>

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sun Sep 17 21:40:58 2017
@@ -882,7 +882,7 @@ define <16 x i32> @test_insert_128_v16i3
 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
 ; CHECK-LABEL: test_insert_128_v8f64:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %r = insertelement <8 x double> %x, double %y, i32 1

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Sun Sep 17 21:40:58 2017
@@ -1906,7 +1906,7 @@ define <2 x i64> @test_4xi64_to_2xi64_pe
 ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> <i32 2, i32 0>
@@ -2313,7 +2313,7 @@ define <2 x i64> @test_8xi64_to_2xi64_pe
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 3, i32 0>
@@ -2684,7 +2684,7 @@ define <2 x i64> @test_8xi64_to_2xi64_pe
 ; CHECK-NEXT:    vmovaps (%rdi), %zmm0
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %vec = load <8 x i64>, <8 x i64>* %vp
@@ -3703,7 +3703,7 @@ define <2 x double> @test_4xdouble_to_2x
 ; CHECK-LABEL: test_4xdouble_to_2xdouble_perm_mask0:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %res = shufflevector <4 x double> %vec, <4 x double> undef, <2 x i32> <i32 2, i32 0>

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/unpack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/unpack.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/unpack.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/unpack.ll Sun Sep 17 21:40:58 2017
@@ -718,7 +718,7 @@ define <16 x float> @test_16xfloat_zero_
 define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
 ; CHECK-LABEL: test_2xdouble_unpack_low_mask0:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
   %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %res

Modified: llvm/trunk/test/CodeGen/X86/build-vector-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/build-vector-128.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/build-vector-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/build-vector-128.ll Sun Sep 17 21:40:58 2017
@@ -16,7 +16,7 @@ define <2 x double> @test_buildvector_v2
 ;
 ; SSE-64-LABEL: test_buildvector_v2f64:
 ; SSE-64:       # BB#0:
-; SSE-64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-32-LABEL: test_buildvector_v2f64:
@@ -26,7 +26,7 @@ define <2 x double> @test_buildvector_v2
 ;
 ; AVX-64-LABEL: test_buildvector_v2f64:
 ; AVX-64:       # BB#0:
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-64-NEXT:    retq
   %ins0 = insertelement <2 x double> undef, double %a0, i32 0
   %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
@@ -43,7 +43,7 @@ define <4 x float> @test_buildvector_v4f
 ; SSE2-64:       # BB#0:
 ; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-64-NEXT:    retq
 ;
 ; SSE41-64-LABEL: test_buildvector_v4f32:

Modified: llvm/trunk/test/CodeGen/X86/build-vector-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/build-vector-256.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/build-vector-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/build-vector-256.ll Sun Sep 17 21:40:58 2017
@@ -12,8 +12,8 @@ define <4 x double> @test_buildvector_v4
 ;
 ; AVX-64-LABEL: test_buildvector_v4f64:
 ; AVX-64:       # BB#0:
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX-64-NEXT:    retq
   %ins0 = insertelement <4 x double> undef, double %a0, i32 0

Modified: llvm/trunk/test/CodeGen/X86/build-vector-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/build-vector-512.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/build-vector-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/build-vector-512.ll Sun Sep 17 21:40:58 2017
@@ -12,11 +12,11 @@ define <8 x double> @test_buildvector_v8
 ;
 ; AVX-64-LABEL: test_buildvector_v8f64:
 ; AVX-64:       # BB#0:
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
 ; AVX-64-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX-64-NEXT:    vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
 ; AVX-64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Sun Sep 17 21:40:58 2017
@@ -43,7 +43,7 @@ define <4 x float> @test_negative_zero_1
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_negative_zero_1:
@@ -77,7 +77,7 @@ define <4 x float> @test_buildvector_v4f
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_register:
@@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_load:
@@ -129,7 +129,7 @@ define <4 x float> @test_buildvector_v4f
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_partial_load:

Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Sun Sep 17 21:40:58 2017
@@ -855,7 +855,7 @@ define <16 x i8> @_clearupper16xi8b(<16
 ; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    popq %rbx
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    retq
@@ -1033,7 +1033,7 @@ define <32 x i8> @_clearupper32xi8b(<32
 ; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE-NEXT:    popq %rbx
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll Sun Sep 17 21:40:58 2017
@@ -245,7 +245,7 @@ define <4 x double> @combine_vec_fcopysi
 ; SSE-NEXT:    cvtss2sd %xmm5, %xmm4
 ; SSE-NEXT:    andps %xmm8, %xmm4
 ; SSE-NEXT:    orps %xmm0, %xmm4
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; SSE-NEXT:    andps %xmm7, %xmm0
@@ -257,7 +257,7 @@ define <4 x double> @combine_vec_fcopysi
 ; SSE-NEXT:    cvtss2sd %xmm6, %xmm0
 ; SSE-NEXT:    andps %xmm8, %xmm0
 ; SSE-NEXT:    orps %xmm0, %xmm1
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/combine-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-or.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-or.ll Sun Sep 17 21:40:58 2017
@@ -182,7 +182,7 @@ define <4 x i32> @test13(<4 x i32> %a, <
 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test14:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
@@ -207,7 +207,7 @@ define <4 x i32> @test15(<4 x i32> %a, <
 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test16:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; CHECK-NEXT:    movaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>

Modified: llvm/trunk/test/CodeGen/X86/dagcombine-buildvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-buildvector.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-buildvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-buildvector.ll Sun Sep 17 21:40:58 2017
@@ -8,7 +8,7 @@ define void @test(<2 x double>* %dst, <4
 ; CHECK-LABEL: test:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/haddsub-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-2.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-2.ll Sun Sep 17 21:40:58 2017
@@ -923,7 +923,7 @@ define <4 x float> @not_a_hsub_2(<4 x fl
 ; SSE-NEXT:    subss %xmm4, %xmm1
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: not_a_hsub_2:

Modified: llvm/trunk/test/CodeGen/X86/haddsub-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-undef.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-undef.ll Sun Sep 17 21:40:58 2017
@@ -171,7 +171,7 @@ define <4 x float> @test8_undef(<4 x flo
 ; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE-NEXT:    addss %xmm2, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/half.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/half.ll (original)
+++ llvm/trunk/test/CodeGen/X86/half.ll Sun Sep 17 21:40:58 2017
@@ -402,7 +402,7 @@ define <4 x float> @test_extend32_vec4(<
 ; CHECK-LIBCALL-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
 ; CHECK-LIBCALL-NEXT:    unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
 ; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; CHECK-LIBCALL-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-LIBCALL-NEXT:    addq $48, %rsp
 ; CHECK-LIBCALL-NEXT:    popq %rbx
 ; CHECK-LIBCALL-NEXT:    retq
@@ -457,7 +457,7 @@ define <4 x float> @test_extend32_vec4(<
 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-I686-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-I686-NEXT:    addl $56, %esp
 ; CHECK-I686-NEXT:    popl %esi
 ; CHECK-I686-NEXT:    retl
@@ -487,14 +487,14 @@ define <4 x double> @test_extend64_vec4(
 ; CHECK-LIBCALL-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
 ; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
-; CHECK-LIBCALL-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-LIBCALL-NEXT:    movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
 ; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm2
 ; CHECK-LIBCALL-NEXT:    movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
 ; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm1
-; CHECK-LIBCALL-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; CHECK-LIBCALL-NEXT:    addq $16, %rsp
 ; CHECK-LIBCALL-NEXT:    popq %rbx
 ; CHECK-LIBCALL-NEXT:    retq
@@ -515,10 +515,10 @@ define <4 x double> @test_extend64_vec4(
 ; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; BWON-F16C-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; BWON-F16C-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; BWON-F16C-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; BWON-F16C-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; BWON-F16C-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; BWON-F16C-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; BWON-F16C-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/horizontal-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-shuffle.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-shuffle.ll Sun Sep 17 21:40:58 2017
@@ -11,14 +11,14 @@ define <4 x float> @test_unpackl_fhadd_1
 ; X32:       ## BB#0:
 ; X32-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vhaddps %xmm3, %xmm2, %xmm1
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_unpackl_fhadd_128:
 ; X64:       ## BB#0:
 ; X64-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vhaddps %xmm3, %xmm2, %xmm1
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
   %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3)

Modified: llvm/trunk/test/CodeGen/X86/i64-to-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-to-float.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i64-to-float.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i64-to-float.ll Sun Sep 17 21:40:58 2017
@@ -258,7 +258,7 @@ define <2 x double> @clamp_sitofp_2i64_2
 ; X64-SSE-NEXT:    movq %xmm1, %rax
 ; X64-SSE-NEXT:    xorps %xmm1, %xmm1
 ; X64-SSE-NEXT:    cvtsi2sdq %rax, %xmm1
-; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: clamp_sitofp_2i64_2f64:
@@ -273,7 +273,7 @@ define <2 x double> @clamp_sitofp_2i64_2
 ; X64-AVX-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
 ; X64-AVX-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; X64-AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-AVX-NEXT:    retq
   %clo = icmp slt <2 x i64> %a, <i64 -255, i64 -255>
   %lo = select <2 x i1> %clo, <2 x i64> <i64 -255, i64 -255>, <2 x i64> %a

Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Sun Sep 17 21:40:58 2017
@@ -1126,7 +1126,7 @@ define <8 x double> @load_one_mask_bit_s
 ; AVX:       ## BB#0:
 ; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll Sun Sep 17 21:40:58 2017
@@ -270,7 +270,7 @@ define <4 x float> @merge_4f32_f32_012u(
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_012u:
@@ -319,7 +319,7 @@ define <4 x float> @merge_4f32_f32_019u(
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_019u:
@@ -904,14 +904,14 @@ define <2 x i64> @merge_2i64_i64_12_vola
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: merge_2i64_i64_12_volatile:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
 ;
 ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
@@ -964,7 +964,7 @@ define <4 x float> @merge_4f32_f32_2345_
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_2345_volatile:

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Sun Sep 17 21:40:58 2017
@@ -1444,49 +1444,49 @@ define void @test_movhps(<4 x float> %a0
 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
 ; GENERIC-LABEL: test_movlhps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; GENERIC-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movlhps:
 ; ATOM:       # BB#0:
-; ATOM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; ATOM-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movlhps:
 ; SLM:       # BB#0:
-; SLM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SLM-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movlhps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movlhps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKYLAKE-LABEL: test_movlhps:
 ; SKYLAKE:       # BB#0:
-; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movlhps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movlhps:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Sun Sep 17 21:40:58 2017
@@ -2262,7 +2262,7 @@ define <2 x i64> @test_mm_set_epi32(i32
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi32:
@@ -2294,7 +2294,7 @@ define <2 x i64> @test_mm_set_epi64x(i64
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi64x:
@@ -2313,12 +2313,12 @@ define <2 x double> @test_mm_set_pd(doub
 ; X32:       # BB#0:
 ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_pd:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X64-NEXT:    movaps %xmm1, %xmm0
 ; X64-NEXT:    retq
   %res0  = insertelement <2 x double> undef, double %a1, i32 0
@@ -2671,7 +2671,7 @@ define <2 x i64> @test_mm_setr_epi32(i32
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi32:
@@ -2703,7 +2703,7 @@ define <2 x i64> @test_mm_setr_epi64x(i6
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi64x:
@@ -2722,12 +2722,12 @@ define <2 x double> @test_mm_setr_pd(dou
 ; X32:       # BB#0:
 ; X32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_pd:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <2 x double> undef, double %a0, i32 0
   %res1  = insertelement <2 x double> %res0, double %a1, i32 1
@@ -3839,12 +3839,12 @@ define <2 x i64> @test_mm_unpacklo_epi32
 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
 ; X32-LABEL: test_mm_unpacklo_epi64:
 ; X32:       # BB#0:
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_unpacklo_epi64:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %res
@@ -3853,12 +3853,12 @@ define <2 x i64> @test_mm_unpacklo_epi64
 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
 ; X32-LABEL: test_mm_unpacklo_pd:
 ; X32:       # BB#0:
-; X32-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_unpacklo_pd:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %res

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Sun Sep 17 21:40:58 2017
@@ -2863,13 +2863,13 @@ define void @test_movsd_mem(double* %a0,
 define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
 ; GENERIC-LABEL: test_movsd_reg:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; GENERIC-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; GENERIC-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movsd_reg:
 ; ATOM:       # BB#0:
-; ATOM-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; ATOM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
@@ -2879,33 +2879,33 @@ define <2 x double> @test_movsd_reg(<2 x
 ;
 ; SLM-LABEL: test_movsd_reg:
 ; SLM:       # BB#0:
-; SLM-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SLM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movsd_reg:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movsd_reg:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKYLAKE-LABEL: test_movsd_reg:
 ; SKYLAKE:       # BB#0:
-; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movsd_reg:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movsd_reg:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
+; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
   ret <2 x double> %1

Modified: llvm/trunk/test/CodeGen/X86/sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2.ll Sun Sep 17 21:40:58 2017
@@ -40,7 +40,7 @@ define void @test2(<2 x double>* %r, <2
 ; X64-LABEL: test2:
 ; X64:       # BB#0:
 ; X64-NEXT:    movaps (%rsi), %xmm1
-; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X64-NEXT:    movaps %xmm1, (%rdi)
 ; X64-NEXT:    retq
 	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
@@ -206,7 +206,7 @@ define <4 x float> @test9(i32 %dummy, fl
 ; X64:       # BB#0:
 ; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
@@ -225,7 +225,7 @@ define <4 x float> @test10(float %a, flo
 ; X64:       # BB#0:
 ; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
@@ -242,7 +242,7 @@ define <2 x double> @test11(double %a, d
 ;
 ; X64-LABEL: test11:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
@@ -315,7 +315,7 @@ define <4 x float> @test14(<4 x float>*
 ; X86-NEXT:    movaps %xmm2, %xmm0
 ; X86-NEXT:    addps %xmm1, %xmm0
 ; X86-NEXT:    subps %xmm1, %xmm2
-; X86-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X86-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test14:
@@ -325,7 +325,7 @@ define <4 x float> @test14(<4 x float>*
 ; X64-NEXT:    movaps %xmm2, %xmm0
 ; X64-NEXT:    addps %xmm1, %xmm0
 ; X64-NEXT:    subps %xmm1, %xmm2
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
   %tmp5 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=2]

Modified: llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll Sun Sep 17 21:40:58 2017
@@ -342,7 +342,7 @@ define <4 x float> @test14(<4 x float> %
 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
 ; SSE-NEXT:    subss %xmm1, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -417,7 +417,7 @@ define <4 x float> @test16(<4 x float> %
 ; SSE-NEXT:    addss %xmm0, %xmm1
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Sun Sep 17 21:40:58 2017
@@ -935,14 +935,14 @@ define <4 x float> @insertps_with_undefs
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X32-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X32-NEXT:    movaps %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: insertps_with_undefs:
 ; X64:       ## BB#0:
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X64-NEXT:    movaps %xmm1, %xmm0
 ; X64-NEXT:    retq
   %1 = load float, float* %b, align 4

Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Sun Sep 17 21:40:58 2017
@@ -2375,7 +2375,7 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2
 ; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
 ; SSE-NEXT:    retq
@@ -2388,7 +2388,7 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2
 ; AVX-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
 ; AVX-NEXT:    retq
   %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-2.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-2.ll Sun Sep 17 21:40:58 2017
@@ -46,7 +46,7 @@ define <2 x double> @t3(double %s, <2 x
 ;
 ; X64-LABEL: t3:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X64-NEXT:    movaps %xmm1, %xmm0
 ; X64-NEXT:    retq
   %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1

Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Sun Sep 17 21:40:58 2017
@@ -25,7 +25,7 @@ define <2 x double> @sitofp_2i64_to_2f64
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -35,7 +35,7 @@ define <2 x double> @sitofp_2i64_to_2f64
 ; VEX-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; VEX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_2i64_to_2f64:
@@ -44,7 +44,7 @@ define <2 x double> @sitofp_2i64_to_2f64
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_2i64_to_2f64:
@@ -53,7 +53,7 @@ define <2 x double> @sitofp_2i64_to_2f64
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
@@ -223,14 +223,14 @@ define <4 x double> @sitofp_4i64_to_4f64
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm3
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    movaps %xmm3, %xmm1
 ; SSE-NEXT:    retq
@@ -242,12 +242,12 @@ define <4 x double> @sitofp_4i64_to_4f64
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -258,12 +258,12 @@ define <4 x double> @sitofp_4i64_to_4f64
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX2-NEXT:    vmovq %xmm1, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX2-NEXT:    vmovq %xmm0, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -274,12 +274,12 @@ define <4 x double> @sitofp_4i64_to_4f64
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm1, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -290,12 +290,12 @@ define <4 x double> @sitofp_4i64_to_4f64
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm1, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
@@ -478,7 +478,7 @@ define <2 x double> @uitofp_2i64_to_2f64
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_2i64_to_2f64:
@@ -487,7 +487,7 @@ define <2 x double> @uitofp_2i64_to_2f64
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
@@ -796,12 +796,12 @@ define <4 x double> @uitofp_4i64_to_4f64
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm1, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -812,12 +812,12 @@ define <4 x double> @uitofp_4i64_to_4f64
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm1, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
@@ -1362,7 +1362,7 @@ define <4 x float> @sitofp_4i64_to_4f32(
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -2189,7 +2189,7 @@ define <4 x float> @uitofp_4i64_to_4f32(
 ; SSE-NEXT:    addss %xmm0, %xmm0
 ; SSE-NEXT:  .LBB47_12:
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -2577,7 +2577,7 @@ define <2 x double> @sitofp_load_2i64_to
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm1
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; VEX-LABEL: sitofp_load_2i64_to_2f64:
@@ -2587,7 +2587,7 @@ define <2 x double> @sitofp_load_2i64_to
 ; VEX-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; VEX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
@@ -2597,7 +2597,7 @@ define <2 x double> @sitofp_load_2i64_to
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
@@ -2607,7 +2607,7 @@ define <2 x double> @sitofp_load_2i64_to
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
@@ -2719,7 +2719,7 @@ define <4 x double> @sitofp_load_4i64_to
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm1
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm1
@@ -2727,7 +2727,7 @@ define <4 x double> @sitofp_load_4i64_to
 ; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    xorps %xmm2, %xmm2
 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm2
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_load_4i64_to_4f64:
@@ -2738,12 +2738,12 @@ define <4 x double> @sitofp_load_4i64_to
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -2755,12 +2755,12 @@ define <4 x double> @sitofp_load_4i64_to
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX2-NEXT:    vmovq %xmm1, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX2-NEXT:    vmovq %xmm0, %rax
 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -2772,12 +2772,12 @@ define <4 x double> @sitofp_load_4i64_to
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm1, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -2789,12 +2789,12 @@ define <4 x double> @sitofp_load_4i64_to
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm1, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
@@ -2917,7 +2917,7 @@ define <2 x double> @uitofp_load_2i64_to
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
@@ -2927,7 +2927,7 @@ define <2 x double> @uitofp_load_2i64_to
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
@@ -3138,12 +3138,12 @@ define <4 x double> @uitofp_load_4i64_to
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm1, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm2
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -3155,12 +3155,12 @@ define <4 x double> @uitofp_load_4i64_to
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm1, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm2
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
@@ -3323,7 +3323,7 @@ define <4 x float> @sitofp_load_4i64_to_
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_load_4i64_to_4f32:
@@ -3491,7 +3491,7 @@ define <8 x float> @sitofp_load_8i64_to_
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; SSE-NEXT:    movq %xmm3, %rax
 ; SSE-NEXT:    xorps %xmm4, %xmm4
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
@@ -3508,7 +3508,7 @@ define <8 x float> @sitofp_load_8i64_to_
 ; SSE-NEXT:    xorps %xmm2, %xmm2
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_load_8i64_to_8f32:
@@ -3816,7 +3816,7 @@ define <4 x float> @uitofp_load_4i64_to_
 ; SSE-NEXT:    addss %xmm2, %xmm2
 ; SSE-NEXT:  .LBB76_12:
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
@@ -4227,7 +4227,7 @@ define <8 x float> @uitofp_load_8i64_to_
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    addss %xmm1, %xmm1
 ; SSE-NEXT:  .LBB80_21:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE-NEXT:    movq %xmm2, %rax
@@ -4247,7 +4247,7 @@ define <8 x float> @uitofp_load_8i64_to_
 ; SSE-NEXT:    addss %xmm2, %xmm2
 ; SSE-NEXT:  .LBB80_24:
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:

Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Sun Sep 17 21:40:58 2017
@@ -1570,7 +1570,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2
 ; AVX1-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: cvt_2i16_to_2f64:
@@ -1587,7 +1587,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2
 ; AVX2-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: cvt_2i16_to_2f64:
@@ -1604,7 +1604,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2
 ; AVX512F-NEXT:    vcvtph2ps %ymm1, %zmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
@@ -1621,7 +1621,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2
 ; AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    retq
   %1 = bitcast <2 x i16> %a0 to <2 x half>
   %2 = fpext <2 x half> %1 to <2 x double>
@@ -1652,10 +1652,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4
 ; AVX1-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1682,10 +1682,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4
 ; AVX2-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -1712,10 +1712,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4
 ; AVX512F-NEXT:    vcvtph2ps %ymm3, %zmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -1742,10 +1742,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4
 ; AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
   %1 = bitcast <4 x i16> %a0 to <4 x half>
@@ -1766,7 +1766,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8
 ; AVX1-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: cvt_8i16_to_2f64:
@@ -1781,7 +1781,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8
 ; AVX2-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: cvt_8i16_to_2f64:
@@ -1796,7 +1796,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8
 ; AVX512F-NEXT:    vcvtph2ps %ymm1, %zmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
@@ -1814,7 +1814,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8
 ; AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    retq
   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
   %2 = bitcast <2 x i16> %1 to <2 x half>
@@ -1845,10 +1845,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8
 ; AVX1-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1874,10 +1874,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8
 ; AVX2-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -1903,10 +1903,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8
 ; AVX512F-NEXT:    vcvtph2ps %ymm3, %zmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -1934,10 +1934,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8
 ; AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1987,17 +1987,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8
 ; AVX1-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX1-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX1-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX1-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
@@ -2041,17 +2041,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8
 ; AVX2-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX2-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX2-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX2-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
@@ -2095,17 +2095,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8
 ; AVX512F-NEXT:    vcvtph2ps %ymm7, %zmm7
 ; AVX512F-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX512F-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX512F-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
@@ -2150,17 +2150,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8
 ; AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX512VL-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX512VL-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX512VL-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
@@ -2223,7 +2223,7 @@ define <2 x double> @load_cvt_2i16_to_2f
 ; AVX1-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_cvt_2i16_to_2f64:
@@ -2236,7 +2236,7 @@ define <2 x double> @load_cvt_2i16_to_2f
 ; AVX2-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_cvt_2i16_to_2f64:
@@ -2249,7 +2249,7 @@ define <2 x double> @load_cvt_2i16_to_2f
 ; AVX512F-NEXT:    vcvtph2ps %ymm1, %zmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
@@ -2263,7 +2263,7 @@ define <2 x double> @load_cvt_2i16_to_2f
 ; AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    retq
   %1 = load <2 x i16>, <2 x i16>* %a0
   %2 = bitcast <2 x i16> %1 to <2 x half>
@@ -2288,10 +2288,10 @@ define <4 x double> @load_cvt_4i16_to_4f
 ; AVX1-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -2311,10 +2311,10 @@ define <4 x double> @load_cvt_4i16_to_4f
 ; AVX2-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -2334,10 +2334,10 @@ define <4 x double> @load_cvt_4i16_to_4f
 ; AVX512F-NEXT:    vcvtph2ps %ymm3, %zmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -2357,10 +2357,10 @@ define <4 x double> @load_cvt_4i16_to_4f
 ; AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
   %1 = load <4 x i16>, <4 x i16>* %a0
@@ -2392,10 +2392,10 @@ define <4 x double> @load_cvt_8i16_to_4f
 ; AVX1-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -2421,10 +2421,10 @@ define <4 x double> @load_cvt_8i16_to_4f
 ; AVX2-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -2450,10 +2450,10 @@ define <4 x double> @load_cvt_8i16_to_4f
 ; AVX512F-NEXT:    vcvtph2ps %ymm3, %zmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
@@ -2481,10 +2481,10 @@ define <4 x double> @load_cvt_8i16_to_4f
 ; AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %a0
@@ -2523,17 +2523,17 @@ define <8 x double> @load_cvt_8i16_to_8f
 ; AVX1-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX1-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX1-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX1-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX1-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
@@ -2565,17 +2565,17 @@ define <8 x double> @load_cvt_8i16_to_8f
 ; AVX2-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX2-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX2-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX2-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
 ; AVX2-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
 ; AVX2-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
@@ -2607,17 +2607,17 @@ define <8 x double> @load_cvt_8i16_to_8f
 ; AVX512F-NEXT:    vcvtph2ps %ymm7, %zmm7
 ; AVX512F-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX512F-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX512F-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
 ; AVX512F-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512F-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512F-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512F-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
@@ -2650,17 +2650,17 @@ define <8 x double> @load_cvt_8i16_to_8f
 ; AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
 ; AVX512VL-NEXT:    vcvtss2sd %xmm7, %xmm7, %xmm7
 ; AVX512VL-NEXT:    vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm5, %xmm5, %xmm5
 ; AVX512VL-NEXT:    vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
 ; AVX512VL-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
 ; AVX512VL-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Sun Sep 17 21:40:58 2017
@@ -303,12 +303,12 @@ define <2 x double> @shuffle_v2f64_21(<2
 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
 ; SSE-LABEL: shuffle_v2i64_02:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v2i64_02:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle
@@ -316,13 +316,13 @@ define <2 x i64> @shuffle_v2i64_02(<2 x
 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
 ; SSE-LABEL: shuffle_v2i64_02_copy:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v2i64_02_copy:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle
@@ -502,13 +502,13 @@ define <2 x i64> @shuffle_v2i64_13_copy(
 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
 ; SSE-LABEL: shuffle_v2i64_20:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v2i64_20:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
@@ -516,13 +516,13 @@ define <2 x i64> @shuffle_v2i64_20(<2 x
 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
 ; SSE-LABEL: shuffle_v2i64_20_copy:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v2i64_20_copy:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
@@ -832,20 +832,20 @@ define <2 x double> @shuffle_v2f64_z0(<2
 ; SSE-LABEL: shuffle_v2f64_z0:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    xorps %xmm1, %xmm1
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: shuffle_v2f64_z0:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v2f64_z0:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VL-LABEL: shuffle_v2f64_z0:
@@ -1155,13 +1155,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i6
 ; SSE-LABEL: insert_mem_hi_v2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: insert_mem_hi_v2i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
@@ -1231,13 +1231,13 @@ define <2 x double> @insert_mem_lo_v2f64
 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
 ; SSE-LABEL: insert_reg_hi_v2f64:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: insert_reg_hi_v2f64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Sun Sep 17 21:40:58 2017
@@ -303,12 +303,12 @@ define <4 x float> @shuffle_v4f32_1133(<
 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: shuffle_v4f32_0145:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v4f32_0145:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x float> %shuffle
@@ -499,12 +499,12 @@ define <4 x i32> @shuffle_v4i32_4012(<4
 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: shuffle_v4i32_0145:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v4i32_0145:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x i32> %shuffle
@@ -554,13 +554,13 @@ define <4 x i32> @shuffle_v4i32_0451(<4
 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: shuffle_v4i32_4501:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v4i32_4501:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i32> %shuffle
@@ -1825,7 +1825,7 @@ define <4 x float> @shuffle_v4f32_bitcas
 ; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401:
 ; AVX512VL:       # BB#0:
 ; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX512VL-NEXT:    retq
   %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
   %2 = bitcast <4 x i32> %1 to <2 x double>
@@ -2207,13 +2207,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2
 ; SSE-LABEL: insert_mem_hi_v4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: insert_mem_hi_v4i32:
 ; AVX1OR2:       # BB#0:
 ; AVX1OR2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1OR2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1OR2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1OR2-NEXT:    retq
 ;
 ; AVX512VL-LABEL: insert_mem_hi_v4i32:
@@ -2285,13 +2285,13 @@ define <4 x float> @insert_mem_lo_v4f32(
 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
 ; SSE-LABEL: insert_reg_hi_v4f32:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: insert_reg_hi_v4f32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
   %a.cast = bitcast double %a to <2 x float>
   %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Sun Sep 17 21:40:58 2017
@@ -430,7 +430,7 @@ define <4 x double> @shuffle_v4f64_0415(
 ; AVX1-LABEL: shuffle_v4f64_0415:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -614,7 +614,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4
 ; AVX1-LABEL: shuffle_v4i64_0020:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -869,7 +869,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4
 ; AVX1-LABEL: shuffle_v4i64_0451:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -903,7 +903,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4
 ; AVX1-LABEL: shuffle_v4i64_4015:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1066,7 +1066,7 @@ define <4 x i64> @shuffle_v4i64_0415(<4
 ; AVX1-LABEL: shuffle_v4i64_0415:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Sun Sep 17 21:40:58 2017
@@ -1277,12 +1277,12 @@ define <4 x float> @combine_test2(<4 x f
 define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_test3:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_test3:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
@@ -1381,12 +1381,12 @@ define <4 x i32> @combine_test7(<4 x i32
 define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: combine_test8:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_test8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
@@ -1480,12 +1480,12 @@ define <4 x float> @combine_test12(<4 x
 define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_test13:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_test13:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -1578,12 +1578,12 @@ define <4 x i32> @combine_test17(<4 x i3
 define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: combine_test18:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_test18:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -1641,7 +1641,7 @@ define <4 x i32> @combine_test21(<8 x i3
 ; SSE-LABEL: combine_test21:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; SSE-NEXT:    movaps %xmm2, (%rdi)
 ; SSE-NEXT:    retq
@@ -1649,7 +1649,7 @@ define <4 x i32> @combine_test21(<8 x i3
 ; AVX-LABEL: combine_test21:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; AVX-NEXT:    vmovaps %xmm2, (%rdi)
 ; AVX-NEXT:    vzeroupper
@@ -2168,12 +2168,12 @@ define <4 x float> @combine_undef_input_
 define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_undef_input_test2:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_undef_input_test2:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
@@ -2183,12 +2183,12 @@ define <4 x float> @combine_undef_input_
 define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_undef_input_test3:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_undef_input_test3:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
@@ -2352,12 +2352,12 @@ define <4 x float> @combine_undef_input_
 define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_undef_input_test12:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_undef_input_test12:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
   %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
@@ -2367,12 +2367,12 @@ define <4 x float> @combine_undef_input_
 define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_undef_input_test13:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_undef_input_test13:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
   %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Sun Sep 17 21:40:58 2017
@@ -44,7 +44,7 @@ define <2 x i64> @var_shuffle_v2i64_v2i6
 ; SSE-NEXT:    andl $1, %esi
 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64:
@@ -56,7 +56,7 @@ define <2 x i64> @var_shuffle_v2i64_v2i6
 ; AVX-NEXT:    andl $1, %esi
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
   %x0 = extractelement <2 x i64> %x, i32 %i0
   %x1 = extractelement <2 x i64> %x, i32 %i1
@@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4
 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i3
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i3
 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i3
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i3
 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -1180,7 +1180,7 @@ define <4 x float> @var_shuffle_v4f32_v4
 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: var_shuffle_v4f32_v4f32_x0yx_i32:
@@ -1197,7 +1197,7 @@ define <4 x float> @var_shuffle_v4f32_v4
 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
   %x0 = extractelement <4 x float> %x, i32 %i0
   %x1 = extractelement <4 x float> %x, i32 %i1

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll Sun Sep 17 21:40:58 2017
@@ -104,10 +104,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i6
 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; ALL-NEXT:    movq %rbp, %rsp
 ; ALL-NEXT:    popq %rbp
@@ -135,7 +135,7 @@ define <4 x i64> @var_shuffle_v4i64_v4i6
 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; ALL-NEXT:    vmovaps %xmm0, %xmm0
 ; ALL-NEXT:    movq %rbp, %rsp
 ; ALL-NEXT:    popq %rbp
@@ -161,10 +161,10 @@ define <4 x i64> @var_shuffle_v4i64_v2i6
 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; ALL-NEXT:    retq
   %x0 = extractelement <2 x i64> %x, i64 %i0
@@ -610,10 +610,10 @@ define <4 x i64> @mem_shuffle_v4i64_v4i6
 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; ALL-NEXT:    movq %rbp, %rsp
 ; ALL-NEXT:    popq %rbp
@@ -651,10 +651,10 @@ define <4 x i64> @mem_shuffle_v4i64_v2i6
 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; ALL-NEXT:    retq
   %p0  = getelementptr inbounds i64, i64* %i, i32 0

Modified: llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll Sun Sep 17 21:40:58 2017
@@ -14,7 +14,7 @@
 ; NOTE: This operation is collapsed to a single truncate, so this test no longer covers
 ; what it originally intended to.
 
-; CHECK:      MOVLHPSrr
+; CHECK:      PUNPCKLQDQrr
 ; CHECK:      PSHUFHWri
 ; CHECK:      PACKUSWBrr
 ; CHECK:      PACKUSWBrr

Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Sun Sep 17 21:40:58 2017
@@ -462,15 +462,15 @@ define <2 x i64> @test25(<2 x i64> %a, <
 define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
 ; SSE-LABEL: select_of_shuffles_0:
 ; SSE:       # BB#0:
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; SSE-NEXT:    subps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: select_of_shuffles_0:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

Modified: llvm/trunk/test/CodeGen/X86/widen_extract-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_extract-1.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_extract-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_extract-1.ll Sun Sep 17 21:40:58 2017
@@ -14,7 +14,7 @@ define void @convert(<2 x double>* %dst.
 ;
 ; X64-LABEL: convert:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    movaps %xmm0, (%rdi)
 ; X64-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/xop-mask-comments.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-mask-comments.ll?rev=313509&r1=313508&r2=313509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-mask-comments.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-mask-comments.ll Sun Sep 17 21:40:58 2017
@@ -101,13 +101,13 @@ define <2 x double> @vpermil2pd_21(<2 x
 ; X32-LABEL: vpermil2pd_21:
 ; X32:       # BB#0:
 ; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X32-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vpermil2pd_21:
 ; X64:       # BB#0:
 ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; X64-NEXT:    retq
   %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 10, i64 1>, i8 2)
   ret <2 x double> %1




More information about the llvm-commits mailing list