[llvm] r328423 - [X86][SSE] Ensure we're testing both non-VEX/VEX variants of SSE instructions on AVX targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 24 07:51:52 PDT 2018


Author: rksimon
Date: Sat Mar 24 07:51:52 2018
New Revision: 328423

URL: http://llvm.org/viewvc/llvm-project?rev=328423&view=rev
Log:
[X86][SSE] Ensure we're testing both non-VEX/VEX variants of SSE instructions on AVX targets

And ensure we don't use later instruction sets in SSE schedule tests

Modified:
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,25 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
+
+; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types.
 
 define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_addps:
@@ -30,42 +40,84 @@ define <4 x float> @test_addps(<4 x floa
 ; SLM-NEXT:    addps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -96,42 +148,84 @@ define float @test_addss(float %a0, floa
 ; SLM-NEXT:    addss (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -166,42 +260,84 @@ define <4 x float> @test_andps(<4 x floa
 ; SLM-NEXT:    andps (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_andps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_andps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_andps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_andps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_andps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_andps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_andps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_andps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_andps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_andps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_andps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_andps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_andps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -240,42 +376,84 @@ define <4 x float> @test_andnotps(<4 x f
 ; SLM-NEXT:    andnps (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_andnotps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andnotps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_andnotps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_andnotps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_andnotps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_andnotps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_andnotps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_andnotps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_andnotps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_andnotps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_andnotps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_andnotps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_andnotps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_andnotps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -315,6 +493,13 @@ define <4 x float> @test_cmpps(<4 x floa
 ; SLM-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cmpps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmpps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -322,6 +507,13 @@ define <4 x float> @test_cmpps(<4 x floa
 ; SANDY-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cmpps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cmpps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -329,6 +521,13 @@ define <4 x float> @test_cmpps(<4 x floa
 ; HASWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cmpps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cmpps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -336,6 +535,13 @@ define <4 x float> @test_cmpps(<4 x floa
 ; BROADWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cmpps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cmpps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
@@ -343,14 +549,27 @@ define <4 x float> @test_cmpps(<4 x floa
 ; SKYLAKE-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cmpps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cmpps:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cmpps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cmpps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
@@ -358,6 +577,13 @@ define <4 x float> @test_cmpps(<4 x floa
 ; BTVER2-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cmpps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cmpps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -392,42 +618,84 @@ define float @test_cmpss(float %a0, floa
 ; SLM-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cmpss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmpss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cmpss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cmpss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cmpss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cmpss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cmpss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cmpss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cmpss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cmpss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cmpss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cmpss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cmpss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cmpss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -487,6 +755,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_comiss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_comiss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -501,6 +783,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_comiss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_comiss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -515,6 +811,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_comiss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_comiss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -529,6 +839,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_comiss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_comiss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -543,6 +867,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_comiss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_comiss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -557,6 +895,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_comiss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_comiss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -571,6 +923,20 @@ define i32 @test_comiss(<4 x float> %a0,
 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_comiss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_comiss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -614,6 +980,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsi2ss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsi2ss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -621,6 +994,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsi2ss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsi2ss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -628,6 +1008,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsi2ss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsi2ss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -635,6 +1022,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsi2ss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsi2ss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -642,6 +1036,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsi2ss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsi2ss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -649,6 +1050,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsi2ss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsi2ss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -656,6 +1064,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsi2ss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsi2ss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -691,6 +1106,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsi2ssq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsi2ssq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -698,6 +1120,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsi2ssq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; HASWELL-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsi2ssq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -705,6 +1134,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsi2ssq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsi2ssq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -712,6 +1148,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsi2ssq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsi2ssq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
@@ -719,6 +1162,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsi2ssq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
+; SKX-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsi2ssq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
@@ -726,6 +1176,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsi2ssq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsi2ssq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -733,6 +1190,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsi2ssq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsi2ssq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -768,6 +1232,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtss2si:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtss2si:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtss2si %xmm0, %ecx # sched: [5:1.00]
@@ -775,6 +1246,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtss2si:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtss2si:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtss2si %xmm0, %ecx # sched: [4:1.00]
@@ -782,6 +1260,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtss2si:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtss2si:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [9:1.00]
@@ -789,6 +1274,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtss2si:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtss2si:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtss2si %xmm0, %ecx # sched: [6:1.00]
@@ -796,6 +1288,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtss2si:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtss2si:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtss2si %xmm0, %ecx # sched: [6:1.00]
@@ -803,6 +1302,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtss2si:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtss2si:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtss2si (%rdi), %eax # sched: [8:1.00]
@@ -810,6 +1316,13 @@ define i32 @test_cvtss2si(float %a0, flo
 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtss2si:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtss2si:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtss2si (%rdi), %eax # sched: [12:1.00]
@@ -848,6 +1361,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtss2siq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtss2siq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtss2si %xmm0, %rcx # sched: [5:1.00]
@@ -855,6 +1375,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtss2siq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtss2siq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtss2si %xmm0, %rcx # sched: [4:1.00]
@@ -862,6 +1389,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtss2siq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtss2siq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [9:1.00]
@@ -869,6 +1403,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtss2siq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtss2siq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtss2si %xmm0, %rcx # sched: [6:1.00]
@@ -876,6 +1417,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtss2siq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtss2siq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtss2si %xmm0, %rcx # sched: [6:1.00]
@@ -883,6 +1431,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtss2siq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtss2siq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtss2si (%rdi), %rax # sched: [8:1.00]
@@ -890,6 +1445,13 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtss2siq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtss2siq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtss2si (%rdi), %rax # sched: [12:1.00]
@@ -928,6 +1490,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttss2si:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttss2si:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttss2si %xmm0, %ecx # sched: [5:1.00]
@@ -935,6 +1504,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttss2si:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttss2si:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttss2si %xmm0, %ecx # sched: [4:1.00]
@@ -942,6 +1518,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttss2si:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttss2si:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [9:1.00]
@@ -949,6 +1532,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttss2si:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttss2si:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttss2si %xmm0, %ecx # sched: [7:1.00]
@@ -956,6 +1546,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttss2si:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKX-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttss2si:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttss2si %xmm0, %ecx # sched: [7:1.00]
@@ -963,6 +1560,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttss2si:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttss2si:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttss2si (%rdi), %eax # sched: [8:1.00]
@@ -970,6 +1574,13 @@ define i32 @test_cvttss2si(float %a0, fl
 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttss2si:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttss2si:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttss2si (%rdi), %eax # sched: [12:1.00]
@@ -1005,6 +1616,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttss2siq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttss2siq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttss2si %xmm0, %rcx # sched: [5:1.00]
@@ -1012,6 +1630,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttss2siq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [10:1.00]
+; HASWELL-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttss2siq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttss2si %xmm0, %rcx # sched: [4:1.00]
@@ -1019,6 +1644,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttss2siq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttss2siq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [9:1.00]
@@ -1026,6 +1658,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttss2siq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
+; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttss2siq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttss2si %xmm0, %rcx # sched: [7:1.00]
@@ -1033,6 +1672,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttss2siq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [7:1.00]
+; SKX-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
+; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttss2siq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttss2si %xmm0, %rcx # sched: [7:1.00]
@@ -1040,6 +1686,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttss2siq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttss2siq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttss2si (%rdi), %rax # sched: [8:1.00]
@@ -1047,6 +1700,13 @@ define i64 @test_cvttss2siq(float %a0, f
 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttss2siq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttss2siq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttss2si (%rdi), %rax # sched: [12:1.00]
@@ -1079,42 +1739,84 @@ define <4 x float> @test_divps(<4 x floa
 ; SLM-NEXT:    divps (%rdi), %xmm0 # sched: [37:34.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_divps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SANDY-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_divps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [17:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_divps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
 ; HASWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_divps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:1.00]
+; BROADWELL-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [16:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_divps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; BROADWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_divps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [17:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_divps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; SKYLAKE-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_divps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [17:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_divps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; SKX-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_divps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_divps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
 ; BTVER2-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_divps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_divps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -1145,42 +1847,84 @@ define float @test_divss(float %a0, floa
 ; SLM-NEXT:    divss (%rdi), %xmm0 # sched: [37:34.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_divss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SANDY-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_divss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_divss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
 ; HASWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [18:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_divss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:1.00]
+; BROADWELL-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_divss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; BROADWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_divss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_divss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; SKYLAKE-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_divss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_divss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
 ; SKX-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_divss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_divss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
 ; BTVER2-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_divss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_divss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -1211,42 +1955,84 @@ define void @test_ldmxcsr(i32 %a0) {
 ; SLM-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_ldmxcsr:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_ldmxcsr:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; SANDY-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_ldmxcsr:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_ldmxcsr:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; HASWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_ldmxcsr:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_ldmxcsr:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; BROADWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_ldmxcsr:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_ldmxcsr:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_ldmxcsr:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKX-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_ldmxcsr:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; SKX-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_ldmxcsr:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_ldmxcsr:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; BTVER2-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_ldmxcsr:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_ldmxcsr:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
@@ -1279,42 +2065,84 @@ define <4 x float> @test_maxps(<4 x floa
 ; SLM-NEXT:    maxps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_maxps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_maxps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_maxps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_maxps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_maxps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_maxps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_maxps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_maxps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_maxps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_maxps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_maxps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_maxps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_maxps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1346,42 +2174,84 @@ define <4 x float> @test_maxss(<4 x floa
 ; SLM-NEXT:    maxss (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_maxss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_maxss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_maxss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_maxss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_maxss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_maxss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_maxss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_maxss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_maxss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_maxss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_maxss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_maxss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_maxss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1413,42 +2283,84 @@ define <4 x float> @test_minps(<4 x floa
 ; SLM-NEXT:    minps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_minps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_minps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_minps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_minps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_minps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_minps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_minps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_minps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_minps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_minps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_minps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_minps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_minps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1480,42 +2392,84 @@ define <4 x float> @test_minss(<4 x floa
 ; SLM-NEXT:    minss (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_minss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_minss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_minss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_minss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_minss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_minss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_minss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_minss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_minss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_minss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_minss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_minss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_minss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1550,6 +2504,13 @@ define void @test_movaps(<4 x float> *%a
 ; SLM-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movaps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movaps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1557,6 +2518,13 @@ define void @test_movaps(<4 x float> *%a
 ; SANDY-NEXT:    vmovaps %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movaps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movaps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1564,6 +2532,13 @@ define void @test_movaps(<4 x float> *%a
 ; HASWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movaps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movaps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [5:0.50]
@@ -1571,6 +2546,13 @@ define void @test_movaps(<4 x float> *%a
 ; BROADWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movaps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movaps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1578,6 +2560,13 @@ define void @test_movaps(<4 x float> *%a
 ; SKYLAKE-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movaps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movaps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1585,6 +2574,13 @@ define void @test_movaps(<4 x float> *%a
 ; SKX-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movaps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movaps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovaps (%rdi), %xmm0 # sched: [5:1.00]
@@ -1592,6 +2588,13 @@ define void @test_movaps(<4 x float> *%a
 ; BTVER2-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movaps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movaps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovaps (%rdi), %xmm0 # sched: [8:0.50]
@@ -1628,36 +2631,71 @@ define <4 x float> @test_movhlps(<4 x fl
 ; SLM-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movhlps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movhlps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movhlps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movhlps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movhlps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movhlps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movhlps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movhlps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movhlps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movhlps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movhlps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movhlps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movhlps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movhlps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
@@ -1689,9 +2727,18 @@ define void @test_movhps(<4 x float> %a0
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
+; SLM-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SLM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movhps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movhps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
@@ -1699,6 +2746,14 @@ define void @test_movhps(<4 x float> %a0
 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movhps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movhps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1706,6 +2761,14 @@ define void @test_movhps(<4 x float> %a0
 ; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movhps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movhps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1713,6 +2776,14 @@ define void @test_movhps(<4 x float> %a0
 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movhps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movhps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1720,6 +2791,14 @@ define void @test_movhps(<4 x float> %a0
 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movhps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKX-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SKX-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movhps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1727,6 +2806,14 @@ define void @test_movhps(<4 x float> %a0
 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movhps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movhps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1734,6 +2821,14 @@ define void @test_movhps(<4 x float> %a0
 ; BTVER2-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movhps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movhps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
@@ -1771,42 +2866,84 @@ define <4 x float> @test_movlhps(<4 x fl
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movlhps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movlhps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movlhps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movlhps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movlhps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movlhps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movlhps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movlhps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movlhps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movlhps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SKX-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movlhps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movlhps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movlhps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movlhps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -1839,6 +2976,13 @@ define void @test_movlps(<4 x float> %a0
 ; SLM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movlps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movlps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
@@ -1846,6 +2990,13 @@ define void @test_movlps(<4 x float> %a0
 ; SANDY-NEXT:    vmovlps %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movlps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movlps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1853,6 +3004,13 @@ define void @test_movlps(<4 x float> %a0
 ; HASWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movlps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movlps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1860,6 +3018,13 @@ define void @test_movlps(<4 x float> %a0
 ; BROADWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movlps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movlps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1867,6 +3032,13 @@ define void @test_movlps(<4 x float> %a0
 ; SKYLAKE-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movlps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKX-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movlps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1874,6 +3046,13 @@ define void @test_movlps(<4 x float> %a0
 ; SKX-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movlps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movlps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1881,6 +3060,13 @@ define void @test_movlps(<4 x float> %a0
 ; BTVER2-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movlps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movlps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
@@ -1915,36 +3101,71 @@ define i32 @test_movmskps(<4 x float> %a
 ; SLM-NEXT:    movmskps %xmm0, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movmskps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movmskps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movmskps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movmskps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movmskps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movmskps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movmskps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movmskps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movmskps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movmskps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movmskps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movmskps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movmskps %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movmskps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovmskps %xmm0, %eax # sched: [1:1.00]
@@ -1976,36 +3197,71 @@ define void @test_movntps(<4 x float> %a
 ; SLM-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movntps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovntps %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movntps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movntps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movntps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movntps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movntps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movntps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movntps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movntps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movntps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movntps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovntps %xmm0, (%rdi) # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movntps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movntps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:0.50]
@@ -2036,6 +3292,13 @@ define void @test_movss_mem(float* %a0,
 ; SLM-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movss_mem:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movss_mem:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
@@ -2043,6 +3306,13 @@ define void @test_movss_mem(float* %a0,
 ; SANDY-NEXT:    vmovss %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movss_mem:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movss_mem:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2050,6 +3320,13 @@ define void @test_movss_mem(float* %a0,
 ; HASWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movss_mem:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movss_mem:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2057,6 +3334,13 @@ define void @test_movss_mem(float* %a0,
 ; BROADWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movss_mem:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movss_mem:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2064,6 +3348,13 @@ define void @test_movss_mem(float* %a0,
 ; SKYLAKE-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movss_mem:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movss_mem:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2071,6 +3362,13 @@ define void @test_movss_mem(float* %a0,
 ; SKX-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movss_mem:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movss_mem:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2078,6 +3376,13 @@ define void @test_movss_mem(float* %a0,
 ; BTVER2-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movss_mem:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movss_mem:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2109,39 +3414,74 @@ define <4 x float> @test_movss_reg(<4 x
 ;
 ; SLM-LABEL: test_movss_reg:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SLM-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movss_reg:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movss_reg:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movss_reg:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movss_reg:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movss_reg:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movss_reg:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movss_reg:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movss_reg:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movss_reg:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movss_reg:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movss_reg:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movss_reg:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movss_reg:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movss_reg:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
@@ -2172,6 +3512,13 @@ define void @test_movups(<4 x float> *%a
 ; SLM-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movups:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movups:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2179,6 +3526,13 @@ define void @test_movups(<4 x float> *%a
 ; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movups:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movups:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2186,6 +3540,13 @@ define void @test_movups(<4 x float> *%a
 ; HASWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movups:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movups:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [5:0.50]
@@ -2193,6 +3554,13 @@ define void @test_movups(<4 x float> *%a
 ; BROADWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movups:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movups:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2200,6 +3568,13 @@ define void @test_movups(<4 x float> *%a
 ; SKYLAKE-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movups:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movups:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2207,6 +3582,13 @@ define void @test_movups(<4 x float> *%a
 ; SKX-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movups:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movups:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovups (%rdi), %xmm0 # sched: [5:1.00]
@@ -2214,6 +3596,13 @@ define void @test_movups(<4 x float> *%a
 ; BTVER2-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movups:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movups:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovups (%rdi), %xmm0 # sched: [8:0.50]
@@ -2245,42 +3634,84 @@ define <4 x float> @test_mulps(<4 x floa
 ; SLM-NEXT:    mulps (%rdi), %xmm0 # sched: [8:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mulps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mulps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [11:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mulps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mulps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mulps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
 ; BROADWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mulps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mulps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mulps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mulps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mulps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mulps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mulps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mulps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -2311,42 +3742,84 @@ define float @test_mulss(float %a0, floa
 ; SLM-NEXT:    mulss (%rdi), %xmm0 # sched: [8:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mulss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mulss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [10:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mulss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mulss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mulss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
 ; BROADWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mulss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mulss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mulss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mulss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mulss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mulss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mulss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mulss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -2381,42 +3854,84 @@ define <4 x float> @test_orps(<4 x float
 ; SLM-NEXT:    orps (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_orps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_orps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_orps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_orps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_orps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_orps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_orps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_orps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_orps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_orps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_orps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_orps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_orps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_orps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -2463,6 +3978,16 @@ define void @test_prefetch(i8* %a0) opts
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_prefetch:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    #APP
+; SANDY-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT:    #NO_APP
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_prefetch:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
@@ -2473,6 +3998,16 @@ define void @test_prefetch(i8* %a0) opts
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_prefetch:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    #APP
+; HASWELL-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    #NO_APP
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_prefetch:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
@@ -2483,6 +4018,16 @@ define void @test_prefetch(i8* %a0) opts
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_prefetch:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    #APP
+; BROADWELL-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    #NO_APP
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_prefetch:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
@@ -2493,6 +4038,16 @@ define void @test_prefetch(i8* %a0) opts
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_prefetch:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    #APP
+; SKYLAKE-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    #NO_APP
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_prefetch:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
@@ -2503,6 +4058,16 @@ define void @test_prefetch(i8* %a0) opts
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_prefetch:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    #APP
+; SKX-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT:    #NO_APP
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_prefetch:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
@@ -2513,6 +4078,16 @@ define void @test_prefetch(i8* %a0) opts
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_prefetch:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    #APP
+; BTVER2-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    #NO_APP
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_prefetch:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
@@ -2523,6 +4098,16 @@ define void @test_prefetch(i8* %a0) opts
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_prefetch:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    #APP
+; ZNVER1-SSE-NEXT:    prefetchnta (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    prefetcht0 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    prefetcht1 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    prefetcht2 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    #NO_APP
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_prefetch:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
@@ -2560,6 +4145,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_rcpps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rcpps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2567,6 +4159,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_rcpps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_rcpps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2574,6 +4173,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_rcpps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_rcpps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2581,6 +4187,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_rcpps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_rcpps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vrcpps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2588,6 +4201,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_rcpps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_rcpps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vrcpps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2595,6 +4215,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_rcpps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_rcpps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vrcpps (%rdi), %xmm1 # sched: [7:1.00]
@@ -2602,6 +4229,13 @@ define <4 x float> @test_rcpps(<4 x floa
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_rcpps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_rcpps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vrcpps (%rdi), %xmm1 # sched: [12:0.50]
@@ -2643,6 +4277,14 @@ define <4 x float> @test_rcpss(float %a0
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_rcpss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rcpss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2651,6 +4293,14 @@ define <4 x float> @test_rcpss(float %a0
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_rcpss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_rcpss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2659,6 +4309,14 @@ define <4 x float> @test_rcpss(float %a0
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_rcpss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_rcpss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2667,6 +4325,14 @@ define <4 x float> @test_rcpss(float %a0
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_rcpss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_rcpss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2675,6 +4341,14 @@ define <4 x float> @test_rcpss(float %a0
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_rcpss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_rcpss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2683,6 +4357,14 @@ define <4 x float> @test_rcpss(float %a0
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_rcpss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_rcpss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2691,6 +4373,14 @@ define <4 x float> @test_rcpss(float %a0
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_rcpss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_rcpss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2732,6 +4422,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_rsqrtps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rsqrtps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2739,6 +4436,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_rsqrtps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_rsqrtps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2746,6 +4450,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_rsqrtps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_rsqrtps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2753,6 +4464,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_rsqrtps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_rsqrtps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2760,6 +4478,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_rsqrtps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_rsqrtps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2767,6 +4492,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_rsqrtps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_rsqrtps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
@@ -2774,6 +4506,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_rsqrtps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_rsqrtps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
@@ -2815,6 +4554,14 @@ define <4 x float> @test_rsqrtss(float %
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_rsqrtss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rsqrtss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2823,6 +4570,14 @@ define <4 x float> @test_rsqrtss(float %
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_rsqrtss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_rsqrtss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2831,6 +4586,14 @@ define <4 x float> @test_rsqrtss(float %
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_rsqrtss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_rsqrtss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2839,6 +4602,14 @@ define <4 x float> @test_rsqrtss(float %
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_rsqrtss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_rsqrtss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2847,6 +4618,14 @@ define <4 x float> @test_rsqrtss(float %
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_rsqrtss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_rsqrtss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2855,6 +4634,14 @@ define <4 x float> @test_rsqrtss(float %
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_rsqrtss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_rsqrtss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2863,6 +4650,14 @@ define <4 x float> @test_rsqrtss(float %
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_rsqrtss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_rsqrtss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2902,36 +4697,71 @@ define void @test_sfence() {
 ; SLM-NEXT:    sfence # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_sfence:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    sfence # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sfence:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    sfence # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_sfence:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    sfence # sched: [2:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_sfence:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    sfence # sched: [2:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_sfence:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    sfence # sched: [2:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_sfence:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    sfence # sched: [2:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_sfence:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    sfence # sched: [2:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_sfence:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    sfence # sched: [2:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_sfence:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    sfence # sched: [2:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_sfence:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    sfence # sched: [2:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_sfence:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    sfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_sfence:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    sfence # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_sfence:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    sfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_sfence:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    sfence # sched: [1:0.50]
@@ -2963,6 +4793,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_shufps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_shufps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2970,6 +4807,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_shufps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_shufps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2977,6 +4821,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_shufps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_shufps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2984,6 +4835,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_shufps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_shufps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2991,6 +4849,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_shufps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SKX-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_shufps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2998,6 +4863,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_shufps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_shufps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@@ -3005,6 +4877,13 @@ define <4 x float> @test_shufps(<4 x flo
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_shufps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_shufps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@@ -3041,6 +4920,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_sqrtps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [14:1.00]
+; SANDY-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3048,6 +4934,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_sqrtps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; HASWELL-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_sqrtps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3055,6 +4948,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_sqrtps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [18:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_sqrtps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3062,6 +4962,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_sqrtps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_sqrtps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
@@ -3069,6 +4976,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_sqrtps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [12:1.00]
+; SKX-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [18:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_sqrtps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
@@ -3076,6 +4990,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_sqrtps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [21:21.00]
+; BTVER2-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_sqrtps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
@@ -3083,6 +5004,13 @@ define <4 x float> @test_sqrtps(<4 x flo
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_sqrtps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_sqrtps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
@@ -3124,6 +5052,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_sqrtss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [14:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
@@ -3132,6 +5068,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_sqrtss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_sqrtss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
@@ -3140,6 +5084,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_sqrtss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_sqrtss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
@@ -3148,6 +5100,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_sqrtss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_sqrtss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
@@ -3156,6 +5116,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_sqrtss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [12:1.00]
+; SKX-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SKX-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [12:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_sqrtss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
@@ -3164,6 +5132,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_sqrtss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_sqrtss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovaps (%rdi), %xmm1 # sched: [5:1.00]
@@ -3172,6 +5148,14 @@ define <4 x float> @test_sqrtss(<4 x flo
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_sqrtss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_sqrtss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovaps (%rdi), %xmm1 # sched: [8:0.50]
@@ -3206,42 +5190,84 @@ define i32 @test_stmxcsr() {
 ; SLM-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_stmxcsr:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_stmxcsr:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; SANDY-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_stmxcsr:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_stmxcsr:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
 ; HASWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_stmxcsr:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_stmxcsr:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
 ; BROADWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_stmxcsr:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_stmxcsr:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_stmxcsr:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; SKX-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_stmxcsr:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
 ; SKX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_stmxcsr:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_stmxcsr:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; BTVER2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_stmxcsr:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
+; ZNVER1-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_stmxcsr:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
@@ -3274,42 +5300,84 @@ define <4 x float> @test_subps(<4 x floa
 ; SLM-NEXT:    subps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_subps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_subps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_subps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_subps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_subps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_subps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_subps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_subps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_subps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_subps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_subps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_subps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_subps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3340,42 +5408,84 @@ define float @test_subss(float %a0, floa
 ; SLM-NEXT:    subss (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_subss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_subss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_subss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_subss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_subss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_subss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_subss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_subss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_subss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_subss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_subss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_subss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_subss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3430,6 +5540,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_ucomiss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_ucomiss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3444,6 +5568,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_ucomiss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_ucomiss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3458,6 +5596,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_ucomiss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_ucomiss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3472,6 +5624,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_ucomiss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_ucomiss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3486,6 +5652,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_ucomiss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_ucomiss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3500,6 +5680,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_ucomiss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_ucomiss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3514,6 +5708,20 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_ucomiss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_ucomiss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3557,6 +5765,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_unpckhps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SANDY-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpckhps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3564,6 +5779,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_unpckhps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_unpckhps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3571,6 +5793,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_unpckhps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_unpckhps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3578,6 +5807,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_unpckhps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_unpckhps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3585,6 +5821,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_unpckhps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_unpckhps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3592,6 +5835,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_unpckhps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_unpckhps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -3599,6 +5849,13 @@ define <4 x float> @test_unpckhps(<4 x f
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_unpckhps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_unpckhps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -3634,6 +5891,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_unpcklps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpcklps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3641,6 +5905,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_unpcklps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_unpcklps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3648,6 +5919,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_unpcklps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_unpcklps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3655,6 +5933,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_unpcklps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_unpcklps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3662,6 +5947,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_unpcklps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_unpcklps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3669,6 +5961,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_unpcklps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_unpcklps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -3676,6 +5975,13 @@ define <4 x float> @test_unpcklps(<4 x f
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_unpcklps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_unpcklps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -3712,42 +6018,84 @@ define <4 x float> @test_xorps(<4 x floa
 ; SLM-NEXT:    xorps (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_xorps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_xorps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_xorps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_xorps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_xorps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_xorps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_xorps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_xorps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_xorps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_xorps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_xorps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_xorps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_xorps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_xorps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3796,6 +6144,14 @@ define <4 x float> @test_fnop() nounwind
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_fnop:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    #APP
+; SANDY-SSE-NEXT:    nop # sched: [1:?]
+; SANDY-SSE-NEXT:    #NO_APP
+; SANDY-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_fnop:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
@@ -3804,6 +6160,14 @@ define <4 x float> @test_fnop() nounwind
 ; SANDY-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_fnop:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    #APP
+; HASWELL-SSE-NEXT:    nop # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    #NO_APP
+; HASWELL-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_fnop:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
@@ -3812,6 +6176,14 @@ define <4 x float> @test_fnop() nounwind
 ; HASWELL-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_fnop:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    #APP
+; BROADWELL-SSE-NEXT:    nop # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    #NO_APP
+; BROADWELL-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_fnop:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
@@ -3820,6 +6192,14 @@ define <4 x float> @test_fnop() nounwind
 ; BROADWELL-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_fnop:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    #APP
+; SKYLAKE-SSE-NEXT:    nop # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    #NO_APP
+; SKYLAKE-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_fnop:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
@@ -3828,6 +6208,14 @@ define <4 x float> @test_fnop() nounwind
 ; SKYLAKE-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_fnop:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    #APP
+; SKX-SSE-NEXT:    nop # sched: [1:0.25]
+; SKX-SSE-NEXT:    #NO_APP
+; SKX-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_fnop:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
@@ -3836,6 +6224,14 @@ define <4 x float> @test_fnop() nounwind
 ; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_fnop:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    #APP
+; BTVER2-SSE-NEXT:    nop # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    #NO_APP
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_fnop:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3844,6 +6240,14 @@ define <4 x float> @test_fnop() nounwind
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_fnop:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    #APP
+; ZNVER1-SSE-NEXT:    nop # sched: [1:?]
+; ZNVER1-SSE-NEXT:    #NO_APP
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_fnop:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25]

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
 
 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_addpd:
@@ -30,42 +38,84 @@ define <2 x double> @test_addpd(<2 x dou
 ; SLM-NEXT:    addpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -96,42 +146,84 @@ define double @test_addsd(double %a0, do
 ; SLM-NEXT:    addsd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -165,6 +257,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_andpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -172,6 +271,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_andpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_andpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -179,6 +285,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_andpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_andpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -186,6 +299,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_andpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_andpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -193,6 +313,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_andpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_andpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -200,6 +327,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_andpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_andpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -207,6 +341,13 @@ define <2 x double> @test_andpd(<2 x dou
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_andpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_andpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -246,6 +387,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_andnotpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andnotpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -253,6 +401,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_andnotpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_andnotpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -260,6 +415,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_andnotpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_andnotpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -267,6 +429,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_andnotpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_andnotpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -274,6 +443,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_andnotpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_andnotpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -281,6 +457,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_andnotpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_andnotpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -288,6 +471,13 @@ define <2 x double> @test_andnotpd(<2 x
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_andnotpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_andnotpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -329,36 +519,71 @@ define void @test_clflush(i8* %p){
 ; SLM-NEXT:    clflush (%rdi) # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_clflush:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    clflush (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_clflush:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    clflush (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_clflush:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_clflush:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_clflush:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_clflush:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_clflush:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_clflush:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    clflush (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_clflush:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_clflush:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    clflush (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_clflush:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    clflush (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_clflush:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    clflush (%rdi) # sched: [5:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_clflush:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    clflush (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_clflush:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    clflush (%rdi) # sched: [8:0.50]
@@ -390,6 +615,13 @@ define <2 x double> @test_cmppd(<2 x dou
 ; SLM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cmppd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmppd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -397,6 +629,13 @@ define <2 x double> @test_cmppd(<2 x dou
 ; SANDY-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cmppd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cmppd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -404,6 +643,13 @@ define <2 x double> @test_cmppd(<2 x dou
 ; HASWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cmppd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cmppd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -411,6 +657,13 @@ define <2 x double> @test_cmppd(<2 x dou
 ; BROADWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cmppd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cmppd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
@@ -418,14 +671,27 @@ define <2 x double> @test_cmppd(<2 x dou
 ; SKYLAKE-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cmppd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cmppd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cmppd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cmppd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
@@ -433,6 +699,13 @@ define <2 x double> @test_cmppd(<2 x dou
 ; BTVER2-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cmppd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cmppd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -467,42 +740,84 @@ define double @test_cmpsd(double %a0, do
 ; SLM-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cmpsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmpsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cmpsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cmpsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cmpsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cmpsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cmpsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cmpsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cmpsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cmpsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cmpsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cmpsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cmpsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cmpsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -562,6 +877,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_comisd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_comisd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -576,6 +905,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_comisd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_comisd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -590,6 +933,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_comisd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_comisd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -604,6 +961,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_comisd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_comisd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -618,6 +989,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_comisd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_comisd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -632,6 +1017,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_comisd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_comisd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -646,6 +1045,20 @@ define i32 @test_comisd(<2 x double> %a0
 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_comisd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_comisd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -689,6 +1102,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtdq2pd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtdq2pd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -696,6 +1116,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtdq2pd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtdq2pd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -703,6 +1130,14 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtdq2pd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtdq2pd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
@@ -710,6 +1145,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtdq2pd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -717,6 +1159,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtdq2pd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtdq2pd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -724,6 +1173,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtdq2pd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtdq2pd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -731,6 +1187,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtdq2pd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtdq2pd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
@@ -769,6 +1232,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtdq2ps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtdq2ps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -776,6 +1246,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtdq2ps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtdq2ps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -783,6 +1260,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtdq2ps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtdq2ps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -790,6 +1274,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtdq2ps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
@@ -797,6 +1288,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtdq2ps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtdq2ps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
@@ -804,6 +1302,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtdq2ps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtdq2ps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
@@ -811,6 +1316,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtdq2ps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtdq2ps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
@@ -847,6 +1359,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtpd2dq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtpd2dq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -854,6 +1373,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtpd2dq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtpd2dq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -861,6 +1387,14 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtpd2dq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtpd2dq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -868,6 +1402,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtpd2dq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -875,6 +1416,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtpd2dq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtpd2dq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -882,6 +1430,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtpd2dq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtpd2dq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -889,6 +1444,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtpd2dq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtpd2dq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -926,6 +1488,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtpd2ps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtpd2ps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -933,6 +1502,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtpd2ps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtpd2ps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -940,6 +1516,14 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtpd2ps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtpd2ps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -947,6 +1531,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtpd2ps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -954,6 +1545,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtpd2ps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtpd2ps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -961,6 +1559,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtpd2ps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtpd2ps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
@@ -968,6 +1573,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtpd2ps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtpd2ps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
@@ -1005,6 +1617,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtps2dq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtps2dq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1012,6 +1631,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtps2dq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtps2dq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1019,6 +1645,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtps2dq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtps2dq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1026,6 +1659,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtps2dq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtps2dq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1033,6 +1673,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtps2dq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtps2dq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1040,6 +1687,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtps2dq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtps2dq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1047,6 +1701,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtps2dq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtps2dq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1084,6 +1745,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtps2pd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtps2pd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1091,6 +1759,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtps2pd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtps2pd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1098,6 +1773,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtps2pd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtps2pd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1105,6 +1787,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtps2pd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtps2pd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1112,6 +1801,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtps2pd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtps2pd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1119,6 +1815,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtps2pd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtps2pd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -1126,6 +1829,13 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtps2pd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtps2pd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
@@ -1163,6 +1873,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsd2si:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsd2si:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1170,6 +1887,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsd2si:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsd2si:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1177,6 +1901,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsd2si:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsd2si:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1184,6 +1915,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsd2si:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsd2si:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1191,6 +1929,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsd2si:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsd2si:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1198,6 +1943,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsd2si:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsd2si:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1205,6 +1957,13 @@ define i32 @test_cvtsd2si(double %a0, do
 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsd2si:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsd2si:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1243,6 +2002,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsd2siq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsd2siq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1250,6 +2016,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsd2siq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsd2siq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1257,6 +2030,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsd2siq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsd2siq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1264,6 +2044,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsd2siq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1271,6 +2058,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsd2siq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsd2siq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1278,6 +2072,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsd2siq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsd2siq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1285,6 +2086,13 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsd2siq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsd2siq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1327,6 +2135,14 @@ define float @test_cvtsd2ss(double %a0,
 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsd2ss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsd2ss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1335,6 +2151,14 @@ define float @test_cvtsd2ss(double %a0,
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsd2ss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsd2ss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1343,6 +2167,14 @@ define float @test_cvtsd2ss(double %a0,
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsd2ss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsd2ss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1351,6 +2183,14 @@ define float @test_cvtsd2ss(double %a0,
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsd2ss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1359,6 +2199,14 @@ define float @test_cvtsd2ss(double %a0,
 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsd2ss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsd2ss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1367,6 +2215,14 @@ define float @test_cvtsd2ss(double %a0,
 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsd2ss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsd2ss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -1375,6 +2231,14 @@ define float @test_cvtsd2ss(double %a0,
 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsd2ss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsd2ss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -1411,6 +2275,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsi2sd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsi2sd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1418,6 +2289,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsi2sd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsi2sd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1425,6 +2303,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsi2sd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsi2sd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1432,6 +2317,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsi2sd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1439,6 +2331,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsi2sd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsi2sd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1446,6 +2345,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsi2sd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsi2sd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1453,6 +2359,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsi2sd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsi2sd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1488,6 +2401,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtsi2sdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtsi2sdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1495,6 +2415,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtsi2sdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtsi2sdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1502,6 +2429,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtsi2sdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1509,6 +2443,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtsi2sdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1516,6 +2457,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtsi2sdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtsi2sdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1523,6 +2471,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtsi2sdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtsi2sdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1530,6 +2485,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtsi2sdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1571,6 +2533,14 @@ define double @test_cvtss2sd(float %a0,
 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvtss2sd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtss2sd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
@@ -1579,6 +2549,14 @@ define double @test_cvtss2sd(float %a0,
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvtss2sd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvtss2sd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1587,6 +2565,14 @@ define double @test_cvtss2sd(float %a0,
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvtss2sd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvtss2sd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1595,6 +2581,14 @@ define double @test_cvtss2sd(float %a0,
 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvtss2sd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvtss2sd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1603,6 +2597,14 @@ define double @test_cvtss2sd(float %a0,
 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvtss2sd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvtss2sd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1611,6 +2613,14 @@ define double @test_cvtss2sd(float %a0,
 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvtss2sd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvtss2sd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -1619,6 +2629,14 @@ define double @test_cvtss2sd(float %a0,
 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvtss2sd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvtss2sd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -1656,6 +2674,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttpd2dq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttpd2dq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1663,6 +2688,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttpd2dq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttpd2dq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1670,6 +2702,14 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttpd2dq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttpd2dq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1677,6 +2717,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttpd2dq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1684,6 +2731,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttpd2dq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttpd2dq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1691,6 +2745,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttpd2dq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttpd2dq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -1698,6 +2759,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttpd2dq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttpd2dq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -1736,6 +2804,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttps2dq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttps2dq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1743,6 +2818,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttps2dq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttps2dq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1750,6 +2832,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttps2dq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttps2dq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1757,6 +2846,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttps2dq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttps2dq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1764,6 +2860,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttps2dq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttps2dq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1771,6 +2874,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttps2dq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttps2dq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1778,6 +2888,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttps2dq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttps2dq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1813,6 +2930,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttsd2si:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttsd2si:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1820,6 +2944,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttsd2si:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttsd2si:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1827,6 +2958,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttsd2si:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttsd2si:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1834,6 +2972,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttsd2si:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttsd2si:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1841,6 +2986,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttsd2si:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttsd2si:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1848,6 +3000,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttsd2si:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttsd2si:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1855,6 +3014,13 @@ define i32 @test_cvttsd2si(double %a0, d
 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttsd2si:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttsd2si:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1890,6 +3056,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_cvttsd2siq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvttsd2siq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1897,6 +3070,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_cvttsd2siq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_cvttsd2siq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1904,6 +3084,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_cvttsd2siq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_cvttsd2siq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1911,6 +3098,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_cvttsd2siq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1918,6 +3112,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_cvttsd2siq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_cvttsd2siq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1925,6 +3126,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_cvttsd2siq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_cvttsd2siq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1932,6 +3140,13 @@ define i64 @test_cvttsd2siq(double %a0,
 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_cvttsd2siq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_cvttsd2siq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vcvttsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1964,42 +3179,84 @@ define <2 x double> @test_divpd(<2 x dou
 ; SLM-NEXT:    divpd (%rdi), %xmm0 # sched: [37:34.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_divpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
 ; SANDY-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_divpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_divpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
 ; HASWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_divpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_divpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; BROADWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_divpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_divpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SKYLAKE-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_divpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_divpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SKX-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_divpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_divpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
 ; BTVER2-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_divpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_divpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2030,42 +3287,84 @@ define double @test_divsd(double %a0, do
 ; SLM-NEXT:    divsd (%rdi), %xmm0 # sched: [37:34.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_divsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
 ; SANDY-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_divsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_divsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
 ; HASWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_divsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_divsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; BROADWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_divsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_divsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SKYLAKE-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_divsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_divsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
 ; SKX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_divsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_divsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
 ; BTVER2-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_divsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_divsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2099,36 +3398,71 @@ define void @test_lfence() {
 ; SLM-NEXT:    lfence # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_lfence:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    lfence # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_lfence:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    lfence # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_lfence:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    lfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_lfence:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    lfence # sched: [2:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_lfence:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    lfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_lfence:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    lfence # sched: [2:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_lfence:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    lfence # sched: [2:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_lfence:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    lfence # sched: [2:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_lfence:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    lfence # sched: [2:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_lfence:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    lfence # sched: [2:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_lfence:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    lfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_lfence:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    lfence # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_lfence:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    lfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_lfence:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    lfence # sched: [1:0.50]
@@ -2160,36 +3494,71 @@ define void @test_mfence() {
 ; SLM-NEXT:    mfence # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mfence:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mfence # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mfence:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    mfence # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mfence:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mfence:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    mfence # sched: [2:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mfence:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mfence:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    mfence # sched: [2:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mfence:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mfence # sched: [3:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mfence:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    mfence # sched: [3:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mfence:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mfence # sched: [3:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mfence:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    mfence # sched: [3:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mfence:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mfence:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    mfence # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mfence:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mfence:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    mfence # sched: [1:0.50]
@@ -2219,36 +3588,71 @@ define void @test_maskmovdqu(<16 x i8> %
 ; SLM-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_maskmovdqu:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maskmovdqu:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_maskmovdqu:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_maskmovdqu:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_maskmovdqu:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_maskmovdqu:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_maskmovdqu:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_maskmovdqu:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_maskmovdqu:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_maskmovdqu:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_maskmovdqu:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_maskmovdqu:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_maskmovdqu:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_maskmovdqu:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [100:?]
@@ -2277,42 +3681,84 @@ define <2 x double> @test_maxpd(<2 x dou
 ; SLM-NEXT:    maxpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_maxpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_maxpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_maxpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_maxpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_maxpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_maxpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_maxpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_maxpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_maxpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_maxpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_maxpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_maxpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_maxpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2344,42 +3790,84 @@ define <2 x double> @test_maxsd(<2 x dou
 ; SLM-NEXT:    maxsd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_maxsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_maxsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_maxsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_maxsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_maxsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_maxsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_maxsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_maxsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_maxsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_maxsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_maxsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_maxsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_maxsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2411,42 +3899,84 @@ define <2 x double> @test_minpd(<2 x dou
 ; SLM-NEXT:    minpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_minpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_minpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_minpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_minpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_minpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_minpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_minpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_minpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_minpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_minpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_minpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_minpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_minpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2478,42 +4008,84 @@ define <2 x double> @test_minsd(<2 x dou
 ; SLM-NEXT:    minsd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_minsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_minsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_minsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_minsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_minsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_minsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_minsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_minsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_minsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_minsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_minsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_minsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_minsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2548,6 +4120,13 @@ define void @test_movapd(<2 x double> *%
 ; SLM-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movapd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movapd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2555,6 +4134,13 @@ define void @test_movapd(<2 x double> *%
 ; SANDY-NEXT:    vmovapd %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movapd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movapd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2562,6 +4148,13 @@ define void @test_movapd(<2 x double> *%
 ; HASWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movapd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movapd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:0.50]
@@ -2569,6 +4162,13 @@ define void @test_movapd(<2 x double> *%
 ; BROADWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movapd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movapd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2576,6 +4176,13 @@ define void @test_movapd(<2 x double> *%
 ; SKYLAKE-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movapd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movapd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2583,6 +4190,13 @@ define void @test_movapd(<2 x double> *%
 ; SKX-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movapd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movapd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:1.00]
@@ -2590,6 +4204,13 @@ define void @test_movapd(<2 x double> *%
 ; BTVER2-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movapd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movapd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovapd (%rdi), %xmm0 # sched: [8:0.50]
@@ -2624,6 +4245,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; SLM-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movdqa:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movdqa:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2631,6 +4259,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; SANDY-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movdqa:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movdqa:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2638,6 +4273,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; HASWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movdqa:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movdqa:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
@@ -2645,6 +4287,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; BROADWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movdqa:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movdqa:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2652,6 +4301,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; SKYLAKE-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movdqa:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movdqa:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2659,6 +4315,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; SKX-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movdqa:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movdqa:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
@@ -2666,6 +4329,13 @@ define void @test_movdqa(<2 x i64> *%a0,
 ; BTVER2-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movdqa:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movdqa:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -2700,6 +4370,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; SLM-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movdqu:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movdqu:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2707,6 +4384,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; SANDY-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movdqu:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movdqu:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2714,6 +4398,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; HASWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movdqu:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movdqu:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
@@ -2721,6 +4412,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; BROADWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movdqu:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movdqu:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2728,6 +4426,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; SKYLAKE-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movdqu:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movdqu:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2735,6 +4440,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; SKX-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movdqu:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movdqu:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
@@ -2742,6 +4454,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 ; BTVER2-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movdqu:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movdqu:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
@@ -2785,6 +4504,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; SLM-NEXT:    movd %xmm2, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2795,6 +4524,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; SANDY-NEXT:    vmovd %xmm1, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2805,6 +4544,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; HASWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2815,6 +4564,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; BROADWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2825,16 +4584,36 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; SKYLAKE-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT:    vmovd %edi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT:    vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    vmovd %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2845,6 +4624,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; BTVER2-NEXT:    vmovd %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    movd %edi, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2896,6 +4685,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; SLM-NEXT:    movq %xmm2, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movd_64:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
+; SANDY-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movd_64:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2906,6 +4705,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; SANDY-NEXT:    vmovq %xmm1, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movd_64:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movd_64:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2916,6 +4725,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; HASWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movd_64:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movd_64:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2926,6 +4745,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; BROADWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movd_64:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movd_64:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2936,16 +4765,36 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; SKYLAKE-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movd_64:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
+; SKX-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movd_64:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT:    vmovq %rdi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT:    vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT:    vmovq %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movd_64:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movd_64:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
@@ -2956,6 +4805,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; BTVER2-NEXT:    vmovq %xmm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movd_64:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movq %xmm2, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movd_64:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
@@ -2998,6 +4857,13 @@ define void @test_movhpd(<2 x double> %a
 ; SLM-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movhpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movhpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
@@ -3005,6 +4871,13 @@ define void @test_movhpd(<2 x double> %a
 ; SANDY-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movhpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movhpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3012,6 +4885,13 @@ define void @test_movhpd(<2 x double> %a
 ; HASWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movhpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movhpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3019,6 +4899,13 @@ define void @test_movhpd(<2 x double> %a
 ; BROADWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movhpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movhpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3026,6 +4913,13 @@ define void @test_movhpd(<2 x double> %a
 ; SKYLAKE-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movhpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movhpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3033,6 +4927,13 @@ define void @test_movhpd(<2 x double> %a
 ; SKX-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movhpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movhpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3040,6 +4941,13 @@ define void @test_movhpd(<2 x double> %a
 ; BTVER2-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movhpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movhpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
@@ -3077,6 +4985,13 @@ define void @test_movlpd(<2 x double> %a
 ; SLM-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movlpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movlpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
@@ -3084,6 +4999,13 @@ define void @test_movlpd(<2 x double> %a
 ; SANDY-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movlpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movlpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3091,6 +5013,13 @@ define void @test_movlpd(<2 x double> %a
 ; HASWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movlpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movlpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3098,6 +5027,13 @@ define void @test_movlpd(<2 x double> %a
 ; BROADWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movlpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movlpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3105,6 +5041,13 @@ define void @test_movlpd(<2 x double> %a
 ; SKYLAKE-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movlpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movlpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3112,6 +5055,13 @@ define void @test_movlpd(<2 x double> %a
 ; SKX-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movlpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movlpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3119,6 +5069,13 @@ define void @test_movlpd(<2 x double> %a
 ; BTVER2-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movlpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movlpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
@@ -3152,36 +5109,71 @@ define i32 @test_movmskpd(<2 x double> %
 ; SLM-NEXT:    movmskpd %xmm0, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movmskpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movmskpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movmskpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movmskpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movmskpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movmskpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movmskpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movmskpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movmskpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movmskpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movmskpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movmskpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movmskpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movmskpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovmskpd %xmm0, %eax # sched: [1:1.00]
@@ -3212,42 +5204,84 @@ define void @test_movntdqa(<2 x i64> %a0
 ; SLM-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntdqa:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movntdqa:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movntdqa:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movntdqa:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movntdqa:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movntdqa:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movntdqa:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3277,42 +5311,84 @@ define void @test_movntpd(<2 x double> %
 ; SLM-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movntpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movntpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movntpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movntpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movntpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movntpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movntpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movntpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movntpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movntpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movntpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movntpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movntpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3345,6 +5421,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; SLM-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movq_mem:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movq_mem:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
@@ -3352,6 +5435,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; SANDY-NEXT:    vmovq %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movq_mem:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movq_mem:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3359,6 +5449,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; HASWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movq_mem:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movq_mem:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3366,6 +5463,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; BROADWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movq_mem:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movq_mem:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3373,6 +5477,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; SKYLAKE-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movq_mem:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movq_mem:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3380,6 +5491,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; SKX-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movq_mem:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movq_mem:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -3387,6 +5505,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ; BTVER2-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movq_mem:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movq_mem:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -3422,42 +5547,84 @@ define <2 x i64> @test_movq_reg(<2 x i64
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movq_reg:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movq_reg:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movq_reg:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movq_reg:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movq_reg:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movq_reg:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movq_reg:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movq_reg:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movq_reg:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movq_reg:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; SKX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movq_reg:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movq_reg:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movq_reg:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movq_reg:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
@@ -3490,6 +5657,13 @@ define void @test_movsd_mem(double* %a0,
 ; SLM-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movsd_mem:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movsd_mem:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
@@ -3497,6 +5671,13 @@ define void @test_movsd_mem(double* %a0,
 ; SANDY-NEXT:    vmovsd %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movsd_mem:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movsd_mem:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3504,6 +5685,13 @@ define void @test_movsd_mem(double* %a0,
 ; HASWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movsd_mem:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movsd_mem:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3511,6 +5699,13 @@ define void @test_movsd_mem(double* %a0,
 ; BROADWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movsd_mem:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movsd_mem:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3518,6 +5713,13 @@ define void @test_movsd_mem(double* %a0,
 ; SKYLAKE-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movsd_mem:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movsd_mem:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3525,6 +5727,13 @@ define void @test_movsd_mem(double* %a0,
 ; SKX-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movsd_mem:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movsd_mem:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
@@ -3532,6 +5741,13 @@ define void @test_movsd_mem(double* %a0,
 ; BTVER2-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movsd_mem:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movsd_mem:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
@@ -3567,36 +5783,78 @@ define <2 x double> @test_movsd_reg(<2 x
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movsd_reg:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movsd_reg:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movsd_reg:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movsd_reg:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movsd_reg:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movsd_reg:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movsd_reg:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movsd_reg:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movsd_reg:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKX-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movsd_reg:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movsd_reg:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movsd_reg:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movsd_reg:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movsd_reg:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
@@ -3627,6 +5885,13 @@ define void @test_movupd(<2 x double> *%
 ; SLM-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movupd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movupd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3634,6 +5899,13 @@ define void @test_movupd(<2 x double> *%
 ; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movupd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movupd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3641,6 +5913,13 @@ define void @test_movupd(<2 x double> *%
 ; HASWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movupd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movupd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:0.50]
@@ -3648,6 +5927,13 @@ define void @test_movupd(<2 x double> *%
 ; BROADWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movupd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movupd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3655,6 +5941,13 @@ define void @test_movupd(<2 x double> *%
 ; SKYLAKE-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movupd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movupd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3662,6 +5955,13 @@ define void @test_movupd(<2 x double> *%
 ; SKX-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movupd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movupd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:1.00]
@@ -3669,6 +5969,13 @@ define void @test_movupd(<2 x double> *%
 ; BTVER2-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movupd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movupd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovupd (%rdi), %xmm0 # sched: [8:0.50]
@@ -3700,42 +6007,84 @@ define <2 x double> @test_mulpd(<2 x dou
 ; SLM-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mulpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mulpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mulpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mulpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mulpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
 ; BROADWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mulpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mulpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mulpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mulpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mulpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mulpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
 ; BTVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mulpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mulpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3766,42 +6115,84 @@ define double @test_mulsd(double %a0, do
 ; SLM-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mulsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mulsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mulsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mulsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mulsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
 ; BROADWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mulsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mulsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mulsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mulsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mulsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mulsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
 ; BTVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mulsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mulsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3835,6 +6226,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_orpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_orpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3842,6 +6240,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_orpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_orpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3849,6 +6254,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_orpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_orpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3856,6 +6268,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_orpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_orpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3863,6 +6282,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_orpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_orpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3870,6 +6296,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_orpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_orpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3877,6 +6310,13 @@ define <2 x double> @test_orpd(<2 x doub
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_orpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_orpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3917,42 +6357,84 @@ define <8 x i16> @test_packssdw(<4 x i32
 ; SLM-NEXT:    packssdw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_packssdw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_packssdw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_packssdw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_packssdw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_packssdw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_packssdw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_packssdw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_packssdw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_packssdw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_packssdw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_packssdw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_packssdw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_packssdw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_packssdw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3989,42 +6471,84 @@ define <16 x i8> @test_packsswb(<8 x i16
 ; SLM-NEXT:    packsswb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_packsswb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_packsswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_packsswb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_packsswb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_packsswb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_packsswb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_packsswb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_packsswb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_packsswb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_packsswb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_packsswb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_packsswb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_packsswb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_packsswb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4061,42 +6585,84 @@ define <16 x i8> @test_packuswb(<8 x i16
 ; SLM-NEXT:    packuswb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_packuswb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_packuswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_packuswb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_packuswb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_packuswb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_packuswb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_packuswb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_packuswb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_packuswb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_packuswb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_packuswb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_packuswb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_packuswb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_packuswb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4133,42 +6699,84 @@ define <16 x i8> @test_paddb(<16 x i8> %
 ; SLM-NEXT:    paddb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4203,42 +6811,84 @@ define <4 x i32> @test_paddd(<4 x i32> %
 ; SLM-NEXT:    paddd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4269,42 +6919,84 @@ define <2 x i64> @test_paddq(<2 x i64> %
 ; SLM-NEXT:    paddq (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4339,42 +7031,84 @@ define <16 x i8> @test_paddsb(<16 x i8>
 ; SLM-NEXT:    paddsb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddsb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddsb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddsb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddsb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddsb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddsb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddsb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4410,42 +7144,84 @@ define <8 x i16> @test_paddsw(<8 x i16>
 ; SLM-NEXT:    paddsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4481,42 +7257,84 @@ define <16 x i8> @test_paddusb(<16 x i8>
 ; SLM-NEXT:    paddusb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddusb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddusb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddusb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddusb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddusb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddusb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddusb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddusb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddusb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddusb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddusb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddusb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddusb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4552,42 +7370,84 @@ define <8 x i16> @test_paddusw(<8 x i16>
 ; SLM-NEXT:    paddusw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddusw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddusw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddusw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddusw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddusw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddusw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddusw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddusw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddusw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddusw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddusw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddusw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddusw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4623,42 +7483,84 @@ define <8 x i16> @test_paddw(<8 x i16> %
 ; SLM-NEXT:    paddw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_paddw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_paddw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_paddw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_paddw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_paddw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_paddw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_paddw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_paddw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_paddw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_paddw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_paddw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_paddw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_paddw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_paddw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4692,6 +7594,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pand:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pand:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4699,6 +7608,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pand:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pand:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4706,6 +7622,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pand:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pand:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4713,6 +7636,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pand:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pand:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4720,6 +7650,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pand:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pand:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4727,6 +7664,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pand:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pand:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4734,6 +7678,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pand:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pand:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4775,6 +7726,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pandn:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pandn:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4782,6 +7742,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pandn:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pandn:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4789,6 +7758,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pandn:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pandn:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4796,6 +7774,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pandn:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pandn:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4803,6 +7790,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pandn:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pandn:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4810,6 +7806,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pandn:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pandn:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4817,6 +7822,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pandn:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pandn:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4855,42 +7869,84 @@ define <16 x i8> @test_pavgb(<16 x i8> %
 ; SLM-NEXT:    pavgb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pavgb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pavgb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pavgb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pavgb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pavgb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pavgb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pavgb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pavgb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pavgb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pavgb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pavgb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pavgb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pavgb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pavgb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4935,42 +7991,84 @@ define <8 x i16> @test_pavgw(<8 x i16> %
 ; SLM-NEXT:    pavgw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pavgw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pavgw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pavgw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pavgw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pavgw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pavgw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pavgw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pavgw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pavgw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pavgw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pavgw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pavgw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pavgw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pavgw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5016,6 +8114,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpeqb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpeqb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5023,6 +8128,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpeqb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpeqb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5030,6 +8142,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpeqb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpeqb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5037,6 +8156,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpeqb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpeqb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5044,14 +8170,27 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpeqb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpeqb:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpeqb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpeqb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5059,6 +8198,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpeqb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpeqb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5097,6 +8243,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpeqd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpeqd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5104,6 +8257,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpeqd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpeqd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5111,6 +8271,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpeqd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpeqd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5118,6 +8285,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpeqd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpeqd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5125,14 +8299,27 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpeqd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpeqd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpeqd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpeqd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5140,6 +8327,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpeqd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpeqd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5178,6 +8372,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpeqw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpeqw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5185,6 +8386,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpeqw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpeqw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5192,6 +8400,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpeqw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpeqw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5199,6 +8414,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpeqw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpeqw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5206,14 +8428,27 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpeqw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpeqw:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpeqw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpeqw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5221,6 +8456,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpeqw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpeqw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5260,6 +8502,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpgtb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpgtb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5267,6 +8517,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpgtb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpgtb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5274,6 +8532,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpgtb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpgtb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5281,6 +8547,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpgtb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpgtb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5288,14 +8562,29 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpgtb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpgtb:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpgtb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpgtb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5303,6 +8592,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpgtb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpgtb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5342,6 +8639,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpgtd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpgtd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5349,6 +8654,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpgtd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpgtd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5356,6 +8669,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpgtd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpgtd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5363,6 +8684,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpgtd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpgtd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5370,14 +8699,29 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpgtd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpgtd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpgtd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpgtd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5385,6 +8729,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpgtd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpgtd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5424,6 +8776,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpgtw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpgtw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5431,6 +8791,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpgtw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpgtw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5438,6 +8806,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpgtw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpgtw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5445,6 +8821,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpgtw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpgtw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5452,14 +8836,29 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpgtw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpgtw:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpgtw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpgtw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5467,6 +8866,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpgtw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpgtw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5500,42 +8907,84 @@ define i16 @test_pextrw(<8 x i16> %a0) {
 ; SLM-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pextrw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pextrw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
 ; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pextrw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pextrw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pextrw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpextrw $6, %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pextrw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:2.00]
@@ -5568,42 +9017,84 @@ define <8 x i16> @test_pinsrw(<8 x i16>
 ; SLM-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pinsrw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pinsrw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pinsrw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pinsrw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pinsrw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pinsrw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pinsrw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pinsrw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKYLAKE-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pinsrw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pinsrw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKX-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pinsrw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pinsrw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pinsrw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pinsrw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5634,42 +9125,84 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
 ; SLM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaddwd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaddwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaddwd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaddwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaddwd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaddwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaddwd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaddwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaddwd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaddwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaddwd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaddwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaddwd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaddwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -5706,42 +9239,84 @@ define <8 x i16> @test_pmaxsw(<8 x i16>
 ; SLM-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5777,42 +9352,84 @@ define <16 x i8> @test_pmaxub(<16 x i8>
 ; SLM-NEXT:    pmaxub (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxub:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxub:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxub:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxub:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxub:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxub:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxub:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxub:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxub:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxub:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxub:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxub:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxub:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5848,42 +9465,84 @@ define <8 x i16> @test_pminsw(<8 x i16>
 ; SLM-NEXT:    pminsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5919,42 +9578,84 @@ define <16 x i8> @test_pminub(<16 x i8>
 ; SLM-NEXT:    pminub (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminub:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminub:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminub:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminub:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminub:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminub:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminub:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminub:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminub:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminub:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminub:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminub:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminub:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5985,36 +9686,71 @@ define i32 @test_pmovmskb(<16 x i8> %a0)
 ; SLM-NEXT:    pmovmskb %xmm0, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovmskb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovmskb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovmskb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovmskb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovmskb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovmskb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovmskb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovmskb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovmskb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovmskb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovmskb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovmskb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovmskb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovmskb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovmskb %xmm0, %eax # sched: [1:1.00]
@@ -6043,42 +9779,84 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
 ; SLM-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmulhuw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmulhuw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmulhuw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmulhuw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmulhuw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmulhuw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmulhuw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmulhuw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmulhuw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmulhuw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmulhuw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmulhuw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmulhuw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmulhuw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6110,42 +9888,84 @@ define <8 x i16> @test_pmulhw(<8 x i16>
 ; SLM-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmulhw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmulhw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmulhw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmulhw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmulhw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmulhw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmulhw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmulhw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmulhw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmulhw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmulhw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmulhw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmulhw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmulhw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6177,42 +9997,84 @@ define <8 x i16> @test_pmullw(<8 x i16>
 ; SLM-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmullw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmullw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmullw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmullw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmullw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmullw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmullw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmullw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmullw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmullw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmullw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmullw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmullw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmullw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6243,42 +10105,84 @@ define <2 x i64> @test_pmuludq(<4 x i32>
 ; SLM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmuludq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmuludq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmuludq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmuludq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmuludq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmuludq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmuludq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmuludq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmuludq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmuludq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmuludq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmuludq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmuludq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmuludq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6314,6 +10218,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_por:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_por:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6321,6 +10232,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_por:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_por:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6328,6 +10246,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_por:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_por:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6335,6 +10260,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_por:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_por:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6342,6 +10274,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_por:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_por:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6349,6 +10288,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_por:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_por:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6356,6 +10302,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_por:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    por (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_por:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6392,42 +10345,84 @@ define <2 x i64> @test_psadbw(<16 x i8>
 ; SLM-NEXT:    psadbw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psadbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psadbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psadbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psadbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psadbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psadbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psadbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psadbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psadbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psadbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SKX-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psadbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psadbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psadbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psadbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6465,6 +10460,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pshufd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pshufd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
@@ -6472,6 +10474,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pshufd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pshufd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6479,6 +10488,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pshufd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pshufd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6486,6 +10502,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pshufd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pshufd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6493,6 +10516,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pshufd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pshufd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6500,6 +10530,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pshufd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pshufd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
@@ -6507,6 +10544,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pshufd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pshufd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
@@ -6544,6 +10588,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pshufhw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pshufhw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
@@ -6551,6 +10602,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pshufhw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pshufhw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6558,6 +10616,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pshufhw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pshufhw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6565,6 +10630,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pshufhw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pshufhw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6572,6 +10644,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pshufhw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pshufhw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6579,6 +10658,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pshufhw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pshufhw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
@@ -6586,6 +10672,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pshufhw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pshufhw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
@@ -6623,6 +10716,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pshuflw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pshuflw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
@@ -6630,6 +10730,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pshuflw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pshuflw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6637,6 +10744,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pshuflw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pshuflw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6644,6 +10758,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pshuflw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pshuflw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6651,6 +10772,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pshuflw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pshuflw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6658,6 +10786,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pshuflw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pshuflw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
@@ -6665,6 +10800,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pshuflw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pshuflw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
@@ -6700,6 +10842,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; SLM-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pslld:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pslld:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6707,6 +10856,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pslld:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pslld:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6714,6 +10870,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; HASWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pslld:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pslld:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6721,6 +10884,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; BROADWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pslld:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pslld:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6728,6 +10898,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; SKYLAKE-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pslld:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pslld:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6735,6 +10912,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; SKX-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pslld:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pslld:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6742,6 +10926,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; BTVER2-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pslld:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pslld:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6779,36 +10970,71 @@ define <4 x i32> @test_pslldq(<4 x i32>
 ; SLM-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pslldq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pslldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pslldq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pslldq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pslldq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pslldq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pslldq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pslldq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pslldq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pslldq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pslldq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pslldq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pslldq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pslldq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
@@ -6839,6 +11065,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; SLM-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psllq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psllq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6846,6 +11079,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psllq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psllq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6853,6 +11093,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; HASWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psllq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psllq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6860,6 +11107,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; BROADWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psllq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psllq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6867,6 +11121,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; SKYLAKE-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psllq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psllq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6874,6 +11135,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; SKX-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psllq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psllq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6881,6 +11149,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; BTVER2-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psllq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psllq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6918,6 +11193,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; SLM-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psllw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psllw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6925,6 +11207,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psllw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psllw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6932,6 +11221,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; HASWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psllw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psllw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6939,6 +11235,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; BROADWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psllw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psllw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6946,6 +11249,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; SKYLAKE-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psllw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psllw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6953,6 +11263,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; SKX-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psllw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psllw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6960,6 +11277,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; BTVER2-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psllw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psllw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6997,6 +11321,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; SLM-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psrad:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psrad:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7004,6 +11335,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; SANDY-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psrad:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psrad:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7011,6 +11349,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; HASWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psrad:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psrad:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7018,6 +11363,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; BROADWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psrad:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psrad:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7025,6 +11377,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; SKYLAKE-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psrad:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psrad:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7032,6 +11391,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; SKX-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psrad:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psrad:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7039,6 +11405,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; BTVER2-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psrad:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psrad:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7076,6 +11449,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; SLM-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psraw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psraw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7083,6 +11463,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; SANDY-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psraw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psraw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7090,6 +11477,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; HASWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psraw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psraw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7097,6 +11491,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; BROADWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psraw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psraw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7104,6 +11505,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; SKYLAKE-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psraw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psraw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7111,6 +11519,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; SKX-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psraw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psraw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7118,6 +11533,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; BTVER2-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psraw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psraw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7155,6 +11577,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; SLM-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psrld:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psrld:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7162,6 +11591,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; SANDY-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psrld:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psrld:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7169,6 +11605,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; HASWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psrld:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psrld:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7176,6 +11619,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; BROADWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psrld:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psrld:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7183,6 +11633,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; SKYLAKE-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psrld:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psrld:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7190,6 +11647,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; SKX-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psrld:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psrld:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7197,6 +11661,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; BTVER2-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psrld:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psrld:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7234,36 +11705,71 @@ define <4 x i32> @test_psrldq(<4 x i32>
 ; SLM-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psrldq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psrldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psrldq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psrldq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psrldq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psrldq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psrldq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psrldq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psrldq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psrldq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psrldq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psrldq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psrldq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psrldq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
@@ -7294,6 +11800,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; SLM-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psrlq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psrlq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7301,6 +11814,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; SANDY-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psrlq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psrlq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7308,6 +11828,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; HASWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psrlq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psrlq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7315,6 +11842,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; BROADWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psrlq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psrlq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7322,6 +11856,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; SKYLAKE-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psrlq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psrlq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7329,6 +11870,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; SKX-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psrlq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psrlq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7336,6 +11884,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; BTVER2-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psrlq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psrlq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7373,6 +11928,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; SLM-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psrlw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psrlw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7380,6 +11942,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; SANDY-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psrlw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psrlw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7387,6 +11956,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; HASWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psrlw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psrlw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7394,6 +11970,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; BROADWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psrlw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psrlw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7401,6 +11984,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; SKYLAKE-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psrlw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psrlw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7408,6 +11998,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; SKX-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psrlw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psrlw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7415,6 +12012,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; BTVER2-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psrlw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psrlw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7453,42 +12057,84 @@ define <16 x i8> @test_psubb(<16 x i8> %
 ; SLM-NEXT:    psubb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7523,42 +12169,84 @@ define <4 x i32> @test_psubd(<4 x i32> %
 ; SLM-NEXT:    psubd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7589,42 +12277,84 @@ define <2 x i64> @test_psubq(<2 x i64> %
 ; SLM-NEXT:    psubq (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7659,42 +12389,84 @@ define <16 x i8> @test_psubsb(<16 x i8>
 ; SLM-NEXT:    psubsb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubsb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubsb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubsb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubsb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubsb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubsb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubsb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7730,42 +12502,84 @@ define <8 x i16> @test_psubsw(<8 x i16>
 ; SLM-NEXT:    psubsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7801,42 +12615,84 @@ define <16 x i8> @test_psubusb(<16 x i8>
 ; SLM-NEXT:    psubusb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubusb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubusb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubusb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubusb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubusb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubusb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubusb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubusb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubusb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubusb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubusb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubusb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubusb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7872,42 +12728,84 @@ define <8 x i16> @test_psubusw(<8 x i16>
 ; SLM-NEXT:    psubusw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubusw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubusw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubusw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubusw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubusw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubusw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubusw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubusw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubusw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubusw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubusw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubusw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubusw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7943,42 +12841,84 @@ define <8 x i16> @test_psubw(<8 x i16> %
 ; SLM-NEXT:    psubw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psubw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psubw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psubw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psubw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psubw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psubw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psubw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psubw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psubw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psubw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psubw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psubw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psubw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psubw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8013,42 +12953,84 @@ define <16 x i8> @test_punpckhbw(<16 x i
 ; SLM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpckhbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpckhbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpckhbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpckhbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
 ; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpckhbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpckhbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
 ; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpckhbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpckhbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpckhbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpckhbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
 ; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpckhbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpckhbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
 ; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpckhbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpckhbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
@@ -8084,6 +13066,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpckhdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpckhdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8091,6 +13080,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpckhdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpckhdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8098,6 +13094,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpckhdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpckhdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8105,6 +13108,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpckhdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpckhdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8112,6 +13122,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpckhdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpckhdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8119,6 +13136,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpckhdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpckhdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8126,6 +13150,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpckhdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpckhdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8161,6 +13192,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpckhqdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpckhqdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8168,6 +13206,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpckhqdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpckhqdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8175,6 +13220,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpckhqdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpckhqdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8182,6 +13234,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpckhqdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpckhqdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8189,6 +13248,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpckhqdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpckhqdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8196,6 +13262,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpckhqdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpckhqdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8203,6 +13276,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpckhqdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpckhqdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8239,42 +13319,84 @@ define <8 x i16> @test_punpckhwd(<8 x i1
 ; SLM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpckhwd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpckhwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpckhwd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpckhwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpckhwd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpckhwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpckhwd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpckhwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpckhwd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpckhwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpckhwd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpckhwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
 ; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpckhwd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpckhwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8309,42 +13431,84 @@ define <16 x i8> @test_punpcklbw(<16 x i
 ; SLM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpcklbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpcklbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpcklbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpcklbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpcklbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpcklbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpcklbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpcklbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpcklbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpcklbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
 ; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpcklbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpcklbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpcklbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpcklbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8380,6 +13544,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpckldq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpckldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8387,6 +13558,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpckldq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpckldq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8394,6 +13572,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpckldq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpckldq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8401,6 +13586,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpckldq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpckldq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8408,6 +13600,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpckldq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpckldq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8415,6 +13614,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpckldq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpckldq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8422,6 +13628,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpckldq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpckldq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8457,6 +13670,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpcklqdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpcklqdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8464,6 +13684,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpcklqdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpcklqdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8471,6 +13698,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpcklqdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpcklqdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8478,6 +13712,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpcklqdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpcklqdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8485,6 +13726,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpcklqdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpcklqdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8492,6 +13740,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpcklqdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpcklqdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8499,6 +13754,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpcklqdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpcklqdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
@@ -8535,42 +13797,84 @@ define <8 x i16> @test_punpcklwd(<8 x i1
 ; SLM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_punpcklwd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_punpcklwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_punpcklwd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_punpcklwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_punpcklwd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_punpcklwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_punpcklwd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_punpcklwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_punpcklwd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_punpcklwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_punpcklwd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_punpcklwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
 ; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_punpcklwd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_punpcklwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8604,6 +13908,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pxor:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pxor:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8611,6 +13922,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pxor:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pxor:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8618,6 +13936,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pxor:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pxor:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8625,6 +13950,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pxor:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pxor:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8632,6 +13964,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pxor:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pxor:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8639,6 +13978,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pxor:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pxor:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -8646,6 +13992,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pxor:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pxor:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8681,6 +14034,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_shufpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_shufpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8688,6 +14048,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_shufpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_shufpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8695,6 +14062,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_shufpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_shufpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8702,6 +14076,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_shufpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_shufpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8709,6 +14090,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_shufpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_shufpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8716,6 +14104,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_shufpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_shufpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8723,6 +14118,13 @@ define <2 x double> @test_shufpd(<2 x do
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_shufpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_shufpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8759,6 +14161,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_sqrtpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [22:1.00]
@@ -8766,6 +14175,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_sqrtpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_sqrtpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8773,6 +14189,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_sqrtpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [25:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_sqrtpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8780,6 +14203,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_sqrtpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_sqrtpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8787,6 +14217,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_sqrtpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [24:1.00]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_sqrtpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8794,6 +14231,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_sqrtpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
+; BTVER2-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_sqrtpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [26:21.00]
@@ -8801,6 +14245,13 @@ define <2 x double> @test_sqrtpd(<2 x do
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_sqrtpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_sqrtpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
@@ -8842,6 +14293,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_sqrtsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8850,6 +14309,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_sqrtsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_sqrtsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8858,6 +14325,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_sqrtsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_sqrtsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8866,6 +14341,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_sqrtsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_sqrtsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8874,6 +14357,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_sqrtsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [18:1.00]
+; SKX-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKX-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_sqrtsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8882,6 +14373,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_sqrtsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_sqrtsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:1.00]
@@ -8890,6 +14389,14 @@ define <2 x double> @test_sqrtsd(<2 x do
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_sqrtsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_sqrtsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovapd (%rdi), %xmm1 # sched: [8:0.50]
@@ -8924,42 +14431,84 @@ define <2 x double> @test_subpd(<2 x dou
 ; SLM-NEXT:    subpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_subpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_subpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_subpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_subpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_subpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_subpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_subpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_subpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_subpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_subpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_subpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_subpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_subpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -8990,42 +14539,84 @@ define double @test_subsd(double %a0, do
 ; SLM-NEXT:    subsd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_subsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_subsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_subsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_subsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_subsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_subsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_subsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_subsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_subsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_subsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_subsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_subsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_subsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -9080,6 +14671,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_ucomisd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_ucomisd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9094,6 +14699,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_ucomisd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_ucomisd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9108,6 +14727,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_ucomisd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_ucomisd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9122,6 +14755,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_ucomisd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_ucomisd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9136,6 +14783,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_ucomisd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_ucomisd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9150,6 +14811,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_ucomisd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_ucomisd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9164,6 +14839,20 @@ define i32 @test_ucomisd(<2 x double> %a
 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_ucomisd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_ucomisd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9207,6 +14896,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_unpckhpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpckhpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9214,6 +14910,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_unpckhpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_unpckhpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9221,6 +14924,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_unpckhpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_unpckhpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9228,6 +14938,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_unpckhpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_unpckhpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9235,6 +14952,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_unpckhpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_unpckhpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9242,6 +14966,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_unpckhpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_unpckhpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9249,6 +14980,13 @@ define <2 x double> @test_unpckhpd(<2 x
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_unpckhpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_unpckhpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9290,6 +15028,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_unpcklpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpcklpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9297,6 +15044,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_unpcklpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_unpcklpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9304,6 +15060,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_unpcklpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_unpcklpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9311,6 +15076,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_unpcklpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_unpcklpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9318,6 +15092,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_unpcklpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_unpcklpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9325,6 +15108,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_unpcklpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_unpcklpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9332,6 +15124,15 @@ define <2 x double> @test_unpcklpd(<2 x
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_unpcklpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_unpcklpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9367,6 +15168,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_xorpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_xorpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9374,6 +15182,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_xorpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_xorpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9381,6 +15196,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_xorpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_xorpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9388,6 +15210,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_xorpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_xorpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9395,6 +15224,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_xorpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_xorpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9402,6 +15238,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_xorpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_xorpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -9409,6 +15252,13 @@ define <2 x double> @test_xorpd(<2 x dou
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_xorpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_xorpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,ZNVER1
 
 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_addsubpd:
@@ -30,42 +38,84 @@ define <2 x double> @test_addsubpd(<2 x
 ; SLM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addsubpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addsubpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addsubpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addsubpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addsubpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addsubpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addsubpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addsubpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addsubpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addsubpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addsubpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addsubpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addsubpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addsubpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -97,42 +147,84 @@ define <4 x float> @test_addsubps(<4 x f
 ; SLM-NEXT:    addsubps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_addsubps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addsubps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_addsubps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_addsubps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_addsubps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_addsubps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_addsubps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_addsubps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_addsubps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_addsubps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_addsubps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_addsubps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_addsubps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_addsubps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -164,42 +256,84 @@ define <2 x double> @test_haddpd(<2 x do
 ; SLM-NEXT:    haddpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_haddpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_haddpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_haddpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_haddpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_haddpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_haddpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; BROADWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_haddpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_haddpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKYLAKE-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_haddpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_haddpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKX-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_haddpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_haddpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_haddpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_haddpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -231,42 +365,84 @@ define <4 x float> @test_haddps(<4 x flo
 ; SLM-NEXT:    haddps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_haddps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_haddps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_haddps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_haddps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_haddps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_haddps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; BROADWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_haddps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_haddps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKYLAKE-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_haddps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_haddps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKX-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_haddps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_haddps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_haddps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_haddps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -298,42 +474,84 @@ define <2 x double> @test_hsubpd(<2 x do
 ; SLM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_hsubpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_hsubpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_hsubpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_hsubpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_hsubpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_hsubpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; BROADWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_hsubpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_hsubpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKYLAKE-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_hsubpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_hsubpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKX-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_hsubpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_hsubpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_hsubpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_hsubpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -365,42 +583,84 @@ define <4 x float> @test_hsubps(<4 x flo
 ; SLM-NEXT:    hsubps (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_hsubps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_hsubps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_hsubps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_hsubps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_hsubps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_hsubps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
 ; BROADWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_hsubps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_hsubps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKYLAKE-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_hsubps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_hsubps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
 ; SKX-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_hsubps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_hsubps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_hsubps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_hsubps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -431,36 +691,71 @@ define <16 x i8> @test_lddqu(i8* %a0) {
 ; SLM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_lddqu:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_lddqu:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_lddqu:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_lddqu:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_lddqu:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_lddqu:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_lddqu:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_lddqu:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_lddqu:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_lddqu:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_lddqu:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_lddqu:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_lddqu:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_lddqu:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vlddqu (%rdi), %xmm0 # sched: [8:0.50]
@@ -492,6 +787,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; SLM-NEXT:    monitor # sched: [100:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_monitor:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_monitor:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -499,6 +801,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; SANDY-NEXT:    monitor # sched: [100:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_monitor:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_monitor:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -506,6 +815,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; HASWELL-NEXT:    monitor # sched: [100:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_monitor:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_monitor:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -513,6 +829,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; BROADWELL-NEXT:    monitor # sched: [100:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_monitor:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_monitor:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -520,6 +843,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_monitor:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_monitor:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -527,6 +857,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; SKX-NEXT:    monitor # sched: [100:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_monitor:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_monitor:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
@@ -534,6 +871,13 @@ define void @test_monitor(i8* %a0, i32 %
 ; BTVER2-NEXT:    monitor # sched: [100:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_monitor:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    monitor # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_monitor:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
@@ -569,6 +913,13 @@ define <2 x double> @test_movddup(<2 x d
 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movddup:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
+; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movddup:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -576,6 +927,13 @@ define <2 x double> @test_movddup(<2 x d
 ; SANDY-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movddup:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movddup:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -583,6 +941,13 @@ define <2 x double> @test_movddup(<2 x d
 ; HASWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movddup:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movddup:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -590,6 +955,13 @@ define <2 x double> @test_movddup(<2 x d
 ; BROADWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movddup:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movddup:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -597,6 +969,13 @@ define <2 x double> @test_movddup(<2 x d
 ; SKYLAKE-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movddup:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SKX-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movddup:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -604,6 +983,13 @@ define <2 x double> @test_movddup(<2 x d
 ; SKX-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movddup:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movddup:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
@@ -611,6 +997,13 @@ define <2 x double> @test_movddup(<2 x d
 ; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movddup:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movddup:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
@@ -648,6 +1041,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movshdup:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movshdup:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -655,6 +1055,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movshdup:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movshdup:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -662,6 +1069,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movshdup:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movshdup:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -669,6 +1083,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movshdup:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movshdup:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -676,6 +1097,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movshdup:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movshdup:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -683,6 +1111,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movshdup:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movshdup:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
@@ -690,6 +1125,13 @@ define <4 x float> @test_movshdup(<4 x f
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movshdup:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movshdup:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
@@ -727,6 +1169,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movsldup:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movsldup:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -734,6 +1183,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movsldup:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movsldup:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -741,6 +1197,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movsldup:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movsldup:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -748,6 +1211,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movsldup:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movsldup:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -755,6 +1225,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movsldup:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movsldup:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -762,6 +1239,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movsldup:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movsldup:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
@@ -769,6 +1253,13 @@ define <4 x float> @test_movsldup(<4 x f
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movsldup:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:?]
+; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:?]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movsldup:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
@@ -804,6 +1295,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; SLM-NEXT:    mwait # sched: [100:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mwait:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mwait:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
@@ -811,6 +1309,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; SANDY-NEXT:    mwait # sched: [100:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mwait:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mwait:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
@@ -818,6 +1323,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; HASWELL-NEXT:    mwait # sched: [20:2.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mwait:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mwait:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
@@ -825,6 +1337,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; BROADWELL-NEXT:    mwait # sched: [100:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mwait:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mwait:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
@@ -832,6 +1351,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mwait:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mwait:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
@@ -839,6 +1365,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; SKX-NEXT:    mwait # sched: [20:2.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mwait:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mwait:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
@@ -846,6 +1379,13 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; BTVER2-NEXT:    mwait # sched: [100:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mwait:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    mwait # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mwait:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,14 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,ZNVER1
 
 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_blendpd:
@@ -25,6 +33,13 @@ define <2 x double> @test_blendpd(<2 x d
 ; SLM-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_blendpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -32,6 +47,13 @@ define <2 x double> @test_blendpd(<2 x d
 ; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_blendpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_blendpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -39,6 +61,13 @@ define <2 x double> @test_blendpd(<2 x d
 ; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_blendpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_blendpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -46,6 +75,13 @@ define <2 x double> @test_blendpd(<2 x d
 ; BROADWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_blendpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_blendpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -53,14 +89,27 @@ define <2 x double> @test_blendpd(<2 x d
 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_blendpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_blendpd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
-; SKX-NEXT:    vmovapd (%rdi), %xmm2 # sched: [6:0.50]
+; SKX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
-; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00]
+; SKX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_blendpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_blendpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -68,6 +117,13 @@ define <2 x double> @test_blendpd(<2 x d
 ; BTVER2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_blendpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_blendpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -96,6 +152,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_blendps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; SANDY-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -103,6 +166,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_blendps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; HASWELL-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_blendps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -110,6 +180,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_blendps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_blendps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -117,6 +194,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_blendps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_blendps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -124,6 +208,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_blendps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKX-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_blendps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -131,6 +222,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_blendps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_blendps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -138,6 +236,13 @@ define <4 x float> @test_blendps(<4 x fl
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_blendps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_blendps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -170,42 +275,105 @@ define <2 x double> @test_blendvpd(<2 x
 ; SLM-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_blendvpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_blendvpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_blendvpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_blendvpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_blendvpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_blendvpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_blendvpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKYLAKE-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_blendvpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_blendvpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKX-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_blendvpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_blendvpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BTVER2-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_blendvpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_blendvpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -237,42 +405,105 @@ define <4 x float> @test_blendvps(<4 x f
 ; SLM-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_blendvps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_blendvps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_blendvps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_blendvps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_blendvps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_blendvps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_blendvps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKYLAKE-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_blendvps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_blendvps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKX-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_blendvps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_blendvps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BTVER2-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_blendvps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_blendvps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -298,42 +529,84 @@ define <2 x double> @test_dppd(<2 x doub
 ; SLM-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_dppd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_dppd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_dppd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_dppd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
 ; HASWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_dppd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_dppd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
 ; BROADWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_dppd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_dppd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
 ; SKYLAKE-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_dppd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_dppd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
 ; SKX-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_dppd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
+; BTVER2-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [14:3.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_dppd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
 ; BTVER2-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_dppd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_dppd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -359,42 +632,84 @@ define <4 x float> @test_dpps(<4 x float
 ; SLM-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_dpps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; SANDY-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
 ; SANDY-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_dpps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; HASWELL-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [20:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_dpps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
 ; HASWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_dpps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; BROADWELL-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_dpps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
 ; BROADWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_dpps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [13:1.50]
+; SKYLAKE-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:1.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_dpps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50]
 ; SKYLAKE-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_dpps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [13:1.33]
+; SKX-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:1.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_dpps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
 ; SKX-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_dpps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BTVER2-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_dpps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
 ; BTVER2-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_dpps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_dpps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -420,42 +735,84 @@ define i32 @test_extractps(<4 x float> %
 ; SLM-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_extractps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_extractps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_extractps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_extractps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_extractps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_extractps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:1.00]
 ; BROADWELL-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_extractps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_extractps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_extractps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_extractps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_extractps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_extractps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vextractps $3, %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_extractps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [5:2.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_extractps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:2.00]
@@ -482,42 +839,84 @@ define <4 x float> @test_insertps(<4 x f
 ; SLM-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_insertps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SANDY-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_insertps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
 ; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_insertps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_insertps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
 ; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_insertps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_insertps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
 ; BROADWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_insertps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_insertps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_insertps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKX-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_insertps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
 ; SKX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_insertps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_insertps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
 ; BTVER2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_insertps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_insertps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
@@ -541,36 +940,71 @@ define <2 x i64> @test_movntdqa(i8* %a0)
 ; SLM-NEXT:    movntdqa (%rdi), %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntdqa:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_movntdqa:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_movntdqa:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_movntdqa:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_movntdqa:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_movntdqa:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_movntdqa:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -593,42 +1027,84 @@ define <8 x i16> @test_mpsadbw(<16 x i8>
 ; SLM-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_mpsadbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mpsadbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_mpsadbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; HASWELL-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_mpsadbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
 ; HASWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_mpsadbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_mpsadbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
 ; BROADWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_mpsadbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKYLAKE-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_mpsadbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
 ; SKYLAKE-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_mpsadbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKX-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_mpsadbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
 ; SKX-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_mpsadbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_mpsadbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_mpsadbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_mpsadbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -655,42 +1131,84 @@ define <8 x i16> @test_packusdw(<4 x i32
 ; SLM-NEXT:    packusdw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_packusdw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_packusdw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_packusdw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_packusdw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_packusdw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_packusdw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_packusdw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_packusdw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_packusdw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_packusdw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_packusdw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_packusdw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_packusdw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_packusdw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -723,42 +1241,105 @@ define <16 x i8> @test_pblendvb(<16 x i8
 ; SLM-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pblendvb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pblendvb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pblendvb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pblendvb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pblendvb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pblendvb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pblendvb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pblendvb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKYLAKE-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pblendvb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pblendvb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
 ; SKX-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pblendvb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pblendvb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BTVER2-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pblendvb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pblendvb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -786,6 +1367,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; SLM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pblendw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pblendw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -793,6 +1381,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pblendw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pblendw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -800,6 +1395,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pblendw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pblendw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -807,6 +1409,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pblendw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pblendw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -814,6 +1423,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pblendw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pblendw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -821,6 +1437,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pblendw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pblendw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -828,6 +1451,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pblendw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
+; ZNVER1-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pblendw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
@@ -854,42 +1484,84 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 ; SLM-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpeqq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpeqq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpeqq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpeqq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpeqq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpeqq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpeqq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpeqq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpeqq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpeqq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpeqq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpeqq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpeqq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpeqq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -916,42 +1588,84 @@ define i32 @test_pextrb(<16 x i8> %a0, i
 ; SLM-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pextrb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pextrb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pextrb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pextrb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pextrb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pextrb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:1.00]
 ; BROADWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pextrb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pextrb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pextrb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pextrb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pextrb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pextrb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpextrb $3, %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pextrb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pextrb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:2.00]
@@ -979,6 +1693,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; SLM-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pextrd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pextrd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -986,6 +1707,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; SANDY-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pextrd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pextrd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -993,6 +1721,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; HASWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pextrd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pextrd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1000,6 +1735,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; BROADWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pextrd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pextrd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1007,6 +1749,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; SKYLAKE-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pextrd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pextrd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1014,6 +1763,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pextrd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pextrd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1021,6 +1777,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ; BTVER2-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pextrd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pextrd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1047,42 +1810,84 @@ define i64 @test_pextrq(<2 x i64> %a0, <
 ; SLM-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pextrq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pextrq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pextrq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pextrq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:1.00]
 ; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pextrq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pextrq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:1.00]
 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pextrq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pextrq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pextrq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKX-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pextrq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pextrq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pextrq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpextrq $1, %xmm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pextrq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pextrq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:2.00]
@@ -1107,42 +1912,84 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 ; SLM-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pextrw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pextrw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:1.00]
 ; BROADWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pextrw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pextrw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pextrw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpextrw $3, %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pextrw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:2.00]
@@ -1168,42 +2015,84 @@ define <8 x i16> @test_phminposuw(<8 x i
 ; SLM-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phminposuw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phminposuw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phminposuw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phminposuw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phminposuw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phminposuw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phminposuw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phminposuw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phminposuw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phminposuw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phminposuw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phminposuw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phminposuw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phminposuw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
@@ -1229,42 +2118,84 @@ define <16 x i8> @test_pinsrb(<16 x i8>
 ; SLM-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pinsrb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pinsrb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pinsrb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pinsrb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pinsrb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pinsrb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pinsrb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pinsrb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKYLAKE-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pinsrb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pinsrb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pinsrb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pinsrb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pinsrb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pinsrb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1289,42 +2220,84 @@ define <4 x i32> @test_pinsrd(<4 x i32>
 ; SLM-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pinsrd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pinsrd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pinsrd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pinsrd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pinsrd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pinsrd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pinsrd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pinsrd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKYLAKE-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pinsrd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pinsrd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
 ; SKX-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pinsrd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pinsrd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pinsrd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pinsrd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1351,6 +2324,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pinsrq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pinsrq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1358,6 +2338,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pinsrq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pinsrq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1365,6 +2352,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pinsrq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pinsrq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1372,6 +2366,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pinsrq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pinsrq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1379,6 +2380,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pinsrq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pinsrq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1386,6 +2394,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pinsrq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pinsrq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
@@ -1393,6 +2408,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pinsrq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pinsrq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
@@ -1419,42 +2441,84 @@ define <16 x i8> @test_pmaxsb(<16 x i8>
 ; SLM-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxsb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxsb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxsb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxsb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxsb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxsb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxsb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1480,42 +2544,84 @@ define <4 x i32> @test_pmaxsd(<4 x i32>
 ; SLM-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1541,42 +2647,84 @@ define <4 x i32> @test_pmaxud(<4 x i32>
 ; SLM-NEXT:    pmaxud (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxud:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxud:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxud:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxud:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxud:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxud:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxud:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxud:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxud:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxud:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxud:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxud:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxud:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxud:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1602,42 +2750,84 @@ define <8 x i16> @test_pmaxuw(<8 x i16>
 ; SLM-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaxuw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaxuw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaxuw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaxuw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaxuw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaxuw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaxuw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaxuw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaxuw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaxuw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaxuw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaxuw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaxuw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaxuw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1663,42 +2853,84 @@ define <16 x i8> @test_pminsb(<16 x i8>
 ; SLM-NEXT:    pminsb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminsb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminsb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminsb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminsb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminsb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminsb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminsb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1724,42 +2956,84 @@ define <4 x i32> @test_pminsd(<4 x i32>
 ; SLM-NEXT:    pminsd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1785,42 +3059,84 @@ define <4 x i32> @test_pminud(<4 x i32>
 ; SLM-NEXT:    pminud (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminud:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminud:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminud:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminud:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminud:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminud:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminud:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminud:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminud:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminud:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminud:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminud:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminud:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminud:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1846,42 +3162,84 @@ define <8 x i16> @test_pminuw(<8 x i16>
 ; SLM-NEXT:    pminuw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pminuw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pminuw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pminuw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pminuw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pminuw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pminuw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pminuw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pminuw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pminuw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pminuw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pminuw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pminuw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pminuw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pminuw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1910,6 +3268,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
@@ -1917,6 +3282,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1924,6 +3296,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1931,6 +3310,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1938,6 +3324,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1945,6 +3338,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
@@ -1952,6 +3352,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
@@ -1982,6 +3389,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxbd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxbd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
@@ -1989,6 +3403,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxbd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxbd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -1996,6 +3417,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxbd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxbd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2003,6 +3431,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxbd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxbd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2010,6 +3445,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxbd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxbd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2017,6 +3459,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxbd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxbd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2024,6 +3473,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxbd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxbd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2054,6 +3510,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxbq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxbq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2061,6 +3524,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxbq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxbq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2068,6 +3538,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxbq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxbq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2075,6 +3552,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxbq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxbq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2082,6 +3566,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxbq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxbq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2089,6 +3580,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxbq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxbq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2096,6 +3594,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxbq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxbq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2126,6 +3631,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2133,6 +3645,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2140,6 +3659,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2147,6 +3673,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2154,6 +3687,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2161,6 +3701,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2168,6 +3715,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2198,6 +3752,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxwd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
@@ -2205,6 +3766,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxwd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2212,6 +3780,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxwd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2219,6 +3794,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxwd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2226,6 +3808,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxwd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2233,6 +3822,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxwd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2240,6 +3836,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxwd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2270,6 +3873,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovsxwq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovsxwq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2277,6 +3887,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovsxwq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovsxwq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2284,6 +3901,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovsxwq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovsxwq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2291,6 +3915,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovsxwq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovsxwq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2298,6 +3929,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovsxwq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovsxwq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2305,6 +3943,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovsxwq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovsxwq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2312,6 +3957,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovsxwq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovsxwq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2342,6 +3994,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxbw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
@@ -2349,6 +4008,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxbw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2356,6 +4022,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxbw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2363,6 +4036,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxbw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2370,6 +4050,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxbw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2377,6 +4064,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxbw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
@@ -2384,6 +4078,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxbw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
@@ -2414,6 +4115,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxbd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxbd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
@@ -2421,6 +4129,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxbd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxbd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2428,6 +4143,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxbd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxbd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2435,6 +4157,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxbd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxbd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2442,6 +4171,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxbd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxbd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2449,6 +4185,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxbd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxbd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
@@ -2456,6 +4199,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxbd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxbd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
@@ -2486,6 +4236,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxbq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxbq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
@@ -2493,6 +4250,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxbq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxbq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2500,6 +4264,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxbq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxbq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2507,6 +4278,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxbq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxbq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2514,6 +4292,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxbq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxbq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2521,6 +4306,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxbq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxbq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
@@ -2528,6 +4320,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxbq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxbq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
@@ -2558,6 +4357,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
@@ -2565,6 +4371,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2572,6 +4385,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2579,6 +4399,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2586,6 +4413,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2593,6 +4427,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
@@ -2600,6 +4441,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
@@ -2630,6 +4478,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxwd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
@@ -2637,6 +4492,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxwd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2644,6 +4506,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxwd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2651,6 +4520,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxwd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2658,6 +4534,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxwd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2665,6 +4548,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxwd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
@@ -2672,6 +4562,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxwd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
@@ -2702,6 +4599,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmovzxwq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmovzxwq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
@@ -2709,6 +4613,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmovzxwq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmovzxwq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2716,6 +4627,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmovzxwq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmovzxwq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2723,6 +4641,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmovzxwq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmovzxwq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2730,6 +4655,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmovzxwq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmovzxwq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2737,6 +4669,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmovzxwq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmovzxwq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
@@ -2744,6 +4683,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmovzxwq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmovzxwq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
@@ -2771,42 +4717,84 @@ define <2 x i64> @test_pmuldq(<4 x i32>
 ; SLM-NEXT:    pmuldq (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmuldq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmuldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmuldq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmuldq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmuldq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmuldq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmuldq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmuldq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmuldq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmuldq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmuldq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmuldq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmuldq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmuldq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2833,42 +4821,84 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmulld:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmulld:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmulld:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; HASWELL-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmulld:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
 ; HASWELL-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmulld:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [15:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmulld:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
 ; BROADWELL-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmulld:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmulld:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
 ; SKYLAKE-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmulld:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:0.67]
+; SKX-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:0.67]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmulld:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
 ; SKX-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmulld:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmulld:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmulld:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmulld:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2901,6 +4931,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; SLM-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_ptest:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT:    setb %al # sched: [1:0.50]
+; SANDY-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_ptest:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2911,6 +4951,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; SANDY-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_ptest:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT:    setb %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_ptest:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2921,6 +4971,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; HASWELL-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_ptest:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT:    setb %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_ptest:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2931,6 +4991,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; BROADWELL-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_ptest:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    setb %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_ptest:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2941,6 +5011,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; SKYLAKE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_ptest:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT:    setb %al # sched: [1:0.50]
+; SKX-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_ptest:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2951,6 +5031,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; SKX-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_ptest:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    setb %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    setb %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_ptest:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2961,6 +5051,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; BTVER2-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_ptest:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT:    setb %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT:    setb %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_ptest:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vptest %xmm1, %xmm0 # sched: [1:1.00]
@@ -2994,6 +5094,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_roundpd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3001,6 +5108,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_roundpd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_roundpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3008,6 +5122,14 @@ define <2 x double> @test_roundpd(<2 x d
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_roundpd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    roundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT:    roundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_roundpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3015,6 +5137,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_roundpd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_roundpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3022,6 +5151,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_roundpd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_roundpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3029,6 +5165,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_roundpd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_roundpd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3036,6 +5179,13 @@ define <2 x double> @test_roundpd(<2 x d
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_roundpd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_roundpd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3066,6 +5216,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_roundps:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3073,6 +5230,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_roundps:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_roundps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3080,6 +5244,14 @@ define <4 x float> @test_roundps(<4 x fl
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_roundps:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    roundps $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT:    roundps $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_roundps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3087,6 +5259,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_roundps:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_roundps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3094,6 +5273,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_roundps:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_roundps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3101,6 +5287,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_roundps:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_roundps:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3108,6 +5301,13 @@ define <4 x float> @test_roundps(<4 x fl
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_roundps:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_roundps:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3139,6 +5339,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; SLM-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_roundsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3146,6 +5354,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_roundsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_roundsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3153,6 +5369,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_roundsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_roundsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3160,6 +5384,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; BROADWELL-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_roundsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_roundsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3167,6 +5399,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_roundsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_roundsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3174,6 +5414,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_roundsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_roundsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3181,6 +5429,14 @@ define <2 x double> @test_roundsd(<2 x d
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_roundsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_roundsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
@@ -3212,6 +5468,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; SLM-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_roundss:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundss:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3219,6 +5483,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_roundss:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_roundss:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3226,6 +5498,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_roundss:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_roundss:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3233,6 +5513,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; BROADWELL-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_roundss:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_roundss:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3240,6 +5528,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_roundss:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_roundss:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3247,6 +5543,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; SKX-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_roundss:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_roundss:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3254,6 +5558,14 @@ define <4 x float> @test_roundss(<4 x fl
 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_roundss:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_roundss:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,14 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx  | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
 
 define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
 ; GENERIC-LABEL: crc32_32_8:
@@ -25,6 +33,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: crc32_32_8:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: crc32_32_8:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -32,6 +47,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: crc32_32_8:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: crc32_32_8:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -39,6 +61,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: crc32_32_8:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: crc32_32_8:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -46,6 +75,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: crc32_32_8:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: crc32_32_8:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -53,6 +89,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: crc32_32_8:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: crc32_32_8:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -60,6 +103,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: crc32_32_8:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: crc32_32_8:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
@@ -67,6 +117,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: crc32_32_8:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: crc32_32_8:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -95,6 +152,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: crc32_32_16:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32w (%rdx), %edi # sched: [7:1.00]
+; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: crc32_32_16:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -102,6 +166,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: crc32_32_16:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: crc32_32_16:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -109,6 +180,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: crc32_32_16:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: crc32_32_16:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -116,6 +194,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: crc32_32_16:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: crc32_32_16:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -123,6 +208,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: crc32_32_16:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: crc32_32_16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -130,6 +222,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: crc32_32_16:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    crc32w %si, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32w (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: crc32_32_16:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    crc32w %si, %edi # sched: [3:2.00]
@@ -137,6 +236,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: crc32_32_16:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32w (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: crc32_32_16:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    crc32w %si, %edi # sched: [3:1.00]
@@ -165,6 +271,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: crc32_32_32:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32l (%rdx), %edi # sched: [7:1.00]
+; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: crc32_32_32:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -172,6 +285,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: crc32_32_32:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: crc32_32_32:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -179,6 +299,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: crc32_32_32:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: crc32_32_32:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -186,6 +313,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: crc32_32_32:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: crc32_32_32:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -193,6 +327,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: crc32_32_32:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: crc32_32_32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -200,6 +341,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: crc32_32_32:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    crc32l %esi, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32l (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: crc32_32_32:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    crc32l %esi, %edi # sched: [3:2.00]
@@ -207,6 +355,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: crc32_32_32:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32l (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: crc32_32_32:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
@@ -235,6 +390,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: crc32_64_8:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: crc32_64_8:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -242,6 +404,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: crc32_64_8:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: crc32_64_8:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -249,6 +418,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: crc32_64_8:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: crc32_64_8:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -256,6 +432,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: crc32_64_8:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: crc32_64_8:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -263,6 +446,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: crc32_64_8:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: crc32_64_8:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -270,6 +460,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: crc32_64_8:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: crc32_64_8:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
@@ -277,6 +474,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: crc32_64_8:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: crc32_64_8:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
@@ -305,6 +509,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: crc32_64_64:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
+; SANDY-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: crc32_64_64:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -312,6 +523,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: crc32_64_64:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
+; HASWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: crc32_64_64:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -319,6 +537,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: crc32_64_64:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: crc32_64_64:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -326,6 +551,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: crc32_64_64:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: crc32_64_64:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -333,6 +565,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: crc32_64_64:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
+; SKX-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: crc32_64_64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -340,6 +579,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: crc32_64_64:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [6:2.00]
+; BTVER2-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: crc32_64_64:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    crc32q %rsi, %rdi # sched: [3:2.00]
@@ -347,6 +593,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: crc32_64_64:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: crc32_64_64:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
@@ -387,6 +640,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; SLM-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpestri:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; SANDY-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; SANDY-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SANDY-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpestri:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movl $7, %eax # sched: [1:0.33]
@@ -400,6 +666,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; SANDY-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpestri:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; HASWELL-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; HASWELL-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; HASWELL-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpestri:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -413,6 +692,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; HASWELL-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpestri:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; BROADWELL-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00]
+; BROADWELL-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; BROADWELL-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpestri:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -426,6 +718,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; BROADWELL-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpestri:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; SKYLAKE-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; SKYLAKE-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SKYLAKE-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpestri:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -439,6 +744,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; SKYLAKE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpestri:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; SKX-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; SKX-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SKX-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpestri:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -452,6 +770,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; SKX-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpestri:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00]
+; BTVER2-SSE-NEXT:    movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00]
+; BTVER2-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; BTVER2-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpestri:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movl $7, %eax # sched: [1:0.50]
@@ -465,6 +796,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; BTVER2-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpestri:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl %ecx, %esi # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; ZNVER1-SSE-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpestri:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -506,6 +850,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; SLM-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpestrm:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; SANDY-SSE-NEXT:    movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpestrm:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movl $7, %eax # sched: [1:0.33]
@@ -516,6 +870,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; SANDY-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpestrm:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; HASWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpestrm:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -526,6 +890,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; HASWELL-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpestrm:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; BROADWELL-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpestrm:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -536,6 +910,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; BROADWELL-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpestrm:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; SKYLAKE-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpestrm:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -546,6 +930,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; SKYLAKE-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpestrm:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; SKX-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpestrm:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -556,6 +950,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; SKX-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpestrm:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00]
+; BTVER2-SSE-NEXT:    movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpestrm:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movl $7, %eax # sched: [1:0.50]
@@ -566,6 +970,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; BTVER2-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpestrm:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpestrm:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    movl $7, %eax # sched: [1:0.25]
@@ -601,6 +1015,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; SLM-NEXT:    leal (%rcx,%rax), %eax # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpistri:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; SANDY-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; SANDY-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SANDY-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpistri:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -610,6 +1033,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; SANDY-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpistri:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; HASWELL-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; HASWELL-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpistri:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -619,6 +1051,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; HASWELL-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpistri:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BROADWELL-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BROADWELL-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; BROADWELL-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpistri:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -628,6 +1069,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; BROADWELL-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpistri:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKYLAKE-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKYLAKE-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SKYLAKE-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpistri:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
@@ -637,6 +1087,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; SKYLAKE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpistri:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKX-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; SKX-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpistri:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
@@ -646,6 +1105,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; SKX-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpistri:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
+; BTVER2-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00]
+; BTVER2-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; BTVER2-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpistri:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
@@ -655,6 +1123,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; BTVER2-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpistri:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    movl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; ZNVER1-SSE-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpistri:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [100:?]
@@ -684,42 +1161,84 @@ define <16 x i8> @test_pcmpistrm(<16 x i
 ; SLM-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpistrm:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; SANDY-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpistrm:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
 ; SANDY-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpistrm:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpistrm:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
 ; HASWELL-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpistrm:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BROADWELL-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpistrm:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
 ; BROADWELL-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpistrm:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKYLAKE-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpistrm:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
 ; SKYLAKE-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpistrm:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpistrm:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
 ; SKX-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpistrm:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
+; BTVER2-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpistrm:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpistrm:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpistrm:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:?]
@@ -745,42 +1264,84 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
 ; SLM-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pcmpgtq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pcmpgtq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pcmpgtq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pcmpgtq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pcmpgtq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pcmpgtq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pcmpgtq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pcmpgtq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pcmpgtq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pcmpgtq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SKX-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pcmpgtq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pcmpgtq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pcmpgtq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
+; ZNVER1-SSE-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pcmpgtq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -807,42 +1368,84 @@ define <2 x i64> @test_pclmulqdq(<2 x i6
 ; SLM-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [10:10.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pclmulqdq:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
+; SANDY-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pclmulqdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
 ; SANDY-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pclmulqdq:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [17:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pclmulqdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00]
 ; HASWELL-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pclmulqdq:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pclmulqdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pclmulqdq:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pclmulqdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pclmulqdq:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pclmulqdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
 ; SKX-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pclmulqdq:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pclmulqdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pclmulqdq:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pclmulqdq $0, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    pclmulqdq $0, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pclmulqdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:?]




More information about the llvm-commits mailing list