[llvm] r353043 - [AsmPrinter] Remove hidden flag -print-schedule.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 4 04:51:26 PST 2019


Removed: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=353042&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (removed)
@@ -1,16972 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_addpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    addpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fadd <2 x double> %a0, %a1
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = fadd <2 x double> %1, %2
-  ret <2 x double> %3
-}
-
-define double @test_addsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_addsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    addsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fadd double %a0, %a1
-  %2 = load double, double *%a2, align 8
-  %3 = fadd double %1, %2
-  ret double %3
-}
-
-define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_andpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    andpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    andpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast <2 x double> %a0 to <4 x i32>
-  %2 = bitcast <2 x double> %a1 to <4 x i32>
-  %3 = and <4 x i32> %1, %2
-  %4 = load <2 x double>, <2 x double> *%a2, align 16
-  %5 = bitcast <2 x double> %4 to <4 x i32>
-  %6 = and <4 x i32> %3, %5
-  %7 = bitcast <4 x i32> %6 to <2 x double>
-  %8 = fadd <2 x double> %a1, %7
-  ret <2 x double> %8
-}
-
-define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_andnotpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andnotpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    andnpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andnotpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    andnpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andnotpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andnotpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andnotpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andnotpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andnotpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andnotpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andnotpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andnotpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andnotpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andnotpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andnotpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andnotpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andnotpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andnotpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andnotpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andnotpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast <2 x double> %a0 to <4 x i32>
-  %2 = bitcast <2 x double> %a1 to <4 x i32>
-  %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
-  %4 = and <4 x i32> %3, %2
-  %5 = load <2 x double>, <2 x double> *%a2, align 16
-  %6 = bitcast <2 x double> %5 to <4 x i32>
-  %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
-  %8 = and <4 x i32> %6, %7
-  %9 = bitcast <4 x i32> %8 to <2 x double>
-  %10 = fadd <2 x double> %a1, %9
-  ret <2 x double> %10
-}
-
-define void @test_clflush(i8* %p){
-; GENERIC-LABEL: test_clflush:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    clflush (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_clflush:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    clflush (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_clflush:
-; SLM:       # %bb.0:
-; SLM-NEXT:    clflush (%rdi) # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_clflush:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    clflush (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_clflush:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    clflush (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_clflush:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_clflush:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_clflush:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_clflush:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_clflush:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_clflush:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_clflush:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_clflush:
-; SKX:       # %bb.0:
-; SKX-NEXT:    clflush (%rdi) # sched: [2:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_clflush:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    clflush (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_clflush:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    clflush (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_clflush:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    clflush (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_clflush:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    clflush (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_clflush:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    clflush (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_clflush:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    clflush (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  tail call void @llvm.x86.sse2.clflush(i8* %p)
-  ret void
-}
-declare void @llvm.x86.sse2.clflush(i8*) nounwind
-
-define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_cmppd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmppd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmppd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmppd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmppd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmppd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmppd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmppd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmppd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmppd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmppd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmppd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmppd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmppd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmppd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmppd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmppd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmppd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmppd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fcmp oeq <2 x double> %a0, %a1
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = fcmp oeq <2 x double> %a0, %2
-  %4 = or <2 x i1> %1, %3
-  %5 = sext <2 x i1> %4 to <2 x i64>
-  %6 = bitcast <2 x i64> %5 to <2 x double>
-  ret <2 x double> %6
-}
-
-define double @test_cmpsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_cmpsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmpsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmpsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmpsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmpsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmpsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmpsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmpsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmpsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmpsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmpsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmpsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmpsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmpsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <2 x double> undef, double %a0, i32 0
-  %2 = insertelement <2 x double> undef, double %a1, i32 0
-  %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0)
-  %4 = load double, double *%a2, align 8
-  %5 = insertelement <2 x double> undef, double %4, i32 0
-  %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0)
-  %7 = extractelement <2 x double> %6, i32 0
-  ret double %7
-}
-declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
-
-define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_comisd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
-; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
-; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
-; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
-; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_comisd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    comisd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    setnp %al # sched: [1:0.50]
-; ATOM-NEXT:    sete %cl # sched: [1:0.50]
-; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT:    comisd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT:    setnp %al # sched: [1:0.50]
-; ATOM-NEXT:    sete %dl # sched: [1:0.50]
-; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_comisd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    setnp %al # sched: [1:0.50]
-; SLM-NEXT:    sete %cl # sched: [1:0.50]
-; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT:    comisd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    setnp %al # sched: [1:0.50]
-; SLM-NEXT:    sete %dl # sched: [1:0.50]
-; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_comisd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_comisd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-NEXT:    sete %cl # sched: [1:0.50]
-; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-NEXT:    sete %dl # sched: [1:0.50]
-; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_comisd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_comisd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
-; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
-; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_comisd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_comisd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_comisd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_comisd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_comisd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_comisd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-NEXT:    sete %cl # sched: [1:0.50]
-; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-NEXT:    sete %dl # sched: [1:0.50]
-; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_comisd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_comisd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-NEXT:    sete %cl # sched: [1:0.50]
-; BDVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT:    vcomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-NEXT:    sete %dl # sched: [1:0.50]
-; BDVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_comisd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_comisd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
-; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
-; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_comisd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_comisd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT:    vcomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 8
-  %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2)
-  %4 = or i32 %1, %3
-  ret i32 %4
-}
-declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2pd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtdq2pd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT:    cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtdq2pd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtdq2pd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2pd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtdq2pd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2pd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtdq2pd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2pd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2pd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtdq2pd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2pd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtdq2pd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2pd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtdq2pd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2pd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtdq2pd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtdq2pd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  %2 = sitofp <2 x i32> %1 to <2 x double>
-  %3 = load <4 x i32>, <4 x i32>*%a1, align 16
-  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  %5 = sitofp <2 x i32> %4 to <2 x double>
-  %6 = fadd <2 x double> %2, %5
-  ret <2 x double> %6
-}
-
-define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2ps:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtdq2ps:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT:    cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtdq2ps:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtdq2ps:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2ps:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtdq2ps:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2ps:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtdq2ps:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2ps:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2ps:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtdq2ps:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2ps:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtdq2ps:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2ps:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtdq2ps:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2ps:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtdq2ps:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtdq2ps:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sitofp <4 x i32> %a0 to <4 x float>
-  %2 = load <4 x i32>, <4 x i32>*%a1, align 16
-  %3 = sitofp <4 x i32> %2 to <4 x float>
-  %4 = fadd <4 x float> %1, %3
-  ret <4 x float> %4
-}
-
-define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2dq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpd2dq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT:    cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpd2dq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtpd2dq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2dq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtpd2dq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2dq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtpd2dq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2dq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2dq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtpd2dq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2dq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtpd2dq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2dq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtpd2dq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2dq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtpd2dq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtpd2dq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
-  %2 = load <2 x double>, <2 x double> *%a1, align 16
-  %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2)
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
-
-define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2ps:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpd2ps:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT:    cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpd2ps:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtpd2ps:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2ps:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtpd2ps:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2ps:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtpd2ps:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2ps:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2ps:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtpd2ps:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2ps:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtpd2ps:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2ps:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtpd2ps:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2ps:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtpd2ps:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtpd2ps:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
-; ZNVER1-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
-  %2 = load <2 x double>, <2 x double> *%a1, align 16
-  %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2)
-  %4 = fadd <4 x float> %1, %3
-  ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
-
-define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvtps2dq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtps2dq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtps2dq (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT:    cvtps2dq %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtps2dq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtps2dq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtps2dq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtps2dq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtps2dq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtps2dq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2dq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtps2dq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2dq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtps2dq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2dq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtps2dq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtps2dq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtps2dq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtps2dq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtps2dq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtps2dq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
-  %2 = load <4 x float>, <4 x float> *%a1, align 16
-  %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2)
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-
-define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvtps2pd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; GENERIC-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtps2pd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtps2pd (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT:    cvtps2pd %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtps2pd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtps2pd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtps2pd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtps2pd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtps2pd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtps2pd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2pd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtps2pd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2pd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtps2pd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2pd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtps2pd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtps2pd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtps2pd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtps2pd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtps2pd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtps2pd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
-; ZNVER1-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-  %2 = fpext <2 x float> %1 to <2 x double>
-  %3 = load <4 x float>, <4 x float> *%a1, align 16
-  %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-  %5 = fpext <2 x float> %4 to <2 x double>
-  %6 = fadd <2 x double> %2, %5
-  ret <2 x double> %6
-}
-
-define i32 @test_cvtsd2si(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2si:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2si:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT:    cvtsd2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2si:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtsd2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2si:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2si:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT:    vcvtsd2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2si:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2si:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2si:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2si:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2si:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2si:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-NEXT:    vcvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2si:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2si:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT:    vcvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2si:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2si:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2si:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2si:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2si:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2si:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <2 x double> undef, double %a0, i32 0
-  %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1)
-  %3 = load double, double *%a1, align 8
-  %4 = insertelement <2 x double> undef, double %3, i32 0
-  %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4)
-  %6 = add i32 %2, %5
-  ret i32 %6
-}
-declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
-
-define i64 @test_cvtsd2siq(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2siq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2siq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:4.50]
-; ATOM-NEXT:    cvtsd2si %xmm0, %rcx # sched: [8:4.00]
-; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2siq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtsd2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2siq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2siq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT:    vcvtsd2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2siq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2siq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2siq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2siq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2siq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-NEXT:    vcvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2siq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2siq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-NEXT:    vcvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2siq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2siq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2siq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2siq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2siq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2siq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <2 x double> undef, double %a0, i32 0
-  %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1)
-  %3 = load double, double *%a1, align 8
-  %4 = insertelement <2 x double> undef, double %3, i32 0
-  %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4)
-  %6 = add i64 %2, %5
-  ret i64 %6
-}
-declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-
-define float @test_cvtsd2ss(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2ss:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2ss:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT:    cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    addss %xmm2, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2ss:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50]
-; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2ss:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2ss:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2ss:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2ss:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2ss:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2ss:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2ss:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2ss:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2ss:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2ss:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2ss:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2ss:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
-; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2ss:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
-; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2ss:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2ss:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fptrunc double %a0 to float
-  %2 = load double, double *%a1, align 8
-  %3 = fptrunc double %2 to float
-  %4 = fadd float %1, %3
-  ret float %4
-}
-
-define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_cvtsi2sd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2sd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2sd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2sd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2sd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2sd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2sd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2sd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2sd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2sd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2sd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2sd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2sd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [14:1.00]
-; BDVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2sd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2sd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2sd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2sd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2sd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sitofp i32 %a0 to double
-  %2 = load i32, i32 *%a1, align 8
-  %3 = sitofp i32 %2 to double
-  %4 = fadd double %1, %3
-  ret double %4
-}
-
-define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_cvtsi2sdq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2sdq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2sdq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2sdq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2sdq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2sdq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2sdq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2sdq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2sdq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2sdq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2sdq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2sdq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2sdq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2sdq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2sdq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2sdq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sitofp i64 %a0 to double
-  %2 = load i64, i64 *%a1, align 8
-  %3 = sitofp i64 %2 to double
-  %4 = fadd double %1, %3
-  ret double %4
-}
-
-; TODO - cvtss2sd_m
-
-define double @test_cvtss2sd(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvtss2sd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtss2sd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT:    cvtss2sd %xmm0, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    cvtss2sd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    addsd %xmm2, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtss2sd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:0.50]
-; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtss2sd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtss2sd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtss2sd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtss2sd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtss2sd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtss2sd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtss2sd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtss2sd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtss2sd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtss2sd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtss2sd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtss2sd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtss2sd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
-; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtss2sd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
-; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtss2sd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtss2sd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fpext float %a0 to double
-  %2 = load float, float *%a1, align 4
-  %3 = fpext float %2 to double
-  %4 = fadd double %1, %3
-  ret double %4
-}
-
-define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvttpd2dq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttpd2dq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT:    cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttpd2dq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttpd2dq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttpd2dq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttpd2dq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttpd2dq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttpd2dq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttpd2dq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttpd2dq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttpd2dq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttpd2dq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttpd2dq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttpd2dq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttpd2dq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttpd2dq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttpd2dq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttpd2dq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fptosi <2 x double> %a0 to <2 x i32>
-  %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %3 = load <2 x double>, <2 x double> *%a1, align 16
-  %4 = fptosi <2 x double> %3 to <2 x i32>
-  %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %6 = add <4 x i32> %2, %5
-  ret <4 x i32> %6
-}
-
-define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvttps2dq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttps2dq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvttps2dq (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT:    cvttps2dq %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttps2dq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttps2dq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttps2dq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttps2dq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttps2dq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttps2dq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttps2dq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttps2dq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttps2dq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttps2dq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttps2dq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttps2dq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttps2dq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttps2dq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttps2dq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttps2dq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttps2dq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fptosi <4 x float> %a0 to <4 x i32>
-  %2 = load <4 x float>, <4 x float> *%a1, align 16
-  %3 = fptosi <4 x float> %2 to <4 x i32>
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-
-define i32 @test_cvttsd2si(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvttsd2si:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttsd2si:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT:    cvttsd2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttsd2si:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvttsd2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttsd2si:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttsd2si:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT:    vcvttsd2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttsd2si:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttsd2si:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttsd2si:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttsd2si:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttsd2si:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttsd2si:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-NEXT:    vcvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttsd2si:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttsd2si:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT:    vcvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttsd2si:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttsd2si:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvttsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttsd2si:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttsd2si:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvttsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttsd2si:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttsd2si:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvttsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fptosi double %a0 to i32
-  %2 = load double, double *%a1, align 8
-  %3 = fptosi double %2 to i32
-  %4 = add i32 %1, %3
-  ret i32 %4
-}
-
-define i64 @test_cvttsd2siq(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvttsd2siq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttsd2siq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:4.50]
-; ATOM-NEXT:    cvttsd2si %xmm0, %rcx # sched: [8:4.00]
-; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttsd2siq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    cvttsd2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttsd2siq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttsd2siq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT:    vcvttsd2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttsd2siq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttsd2siq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttsd2siq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttsd2siq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttsd2siq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-NEXT:    vcvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttsd2siq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttsd2siq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-NEXT:    vcvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttsd2siq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttsd2siq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vcvttsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttsd2siq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttsd2siq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vcvttsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttsd2siq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttsd2siq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vcvttsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fptosi double %a0 to i64
-  %2 = load double, double *%a1, align 8
-  %3 = fptosi double %2 to i64
-  %4 = add i64 %1, %3
-  ret i64 %4
-}
-
-define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_divpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    divpd %xmm1, %xmm0 # sched: [22:22.00]
-; GENERIC-NEXT:    divpd (%rdi), %xmm0 # sched: [28:22.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    divpd %xmm1, %xmm0 # sched: [125:62.50]
-; ATOM-NEXT:    divpd (%rdi), %xmm0 # sched: [125:62.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    divpd %xmm1, %xmm0 # sched: [69:69.00]
-; SLM-NEXT:    divpd (%rdi), %xmm0 # sched: [72:69.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [22:22.00]
-; SANDY-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [28:22.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
-; SANDY-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [20:14.00]
-; HASWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [26:14.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
-; HASWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:8.00]
-; BROADWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [19:8.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00]
-; BROADWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:3.00]
-; SKYLAKE-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:4.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKYLAKE-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:3.00]
-; SKX-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:4.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKX-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fdiv <2 x double> %a0, %a1
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = fdiv <2 x double> %1, %2
-  ret <2 x double> %3
-}
-
-define double @test_divsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_divsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    divsd %xmm1, %xmm0 # sched: [22:22.00]
-; GENERIC-NEXT:    divsd (%rdi), %xmm0 # sched: [28:22.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    divsd %xmm1, %xmm0 # sched: [62:31.00]
-; ATOM-NEXT:    divsd (%rdi), %xmm0 # sched: [62:31.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    divsd %xmm1, %xmm0 # sched: [34:32.00]
-; SLM-NEXT:    divsd (%rdi), %xmm0 # sched: [37:32.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [22:22.00]
-; SANDY-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [28:22.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
-; SANDY-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [20:14.00]
-; HASWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [25:14.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
-; HASWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:4.00]
-; BROADWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:8.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00]
-; BROADWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:3.00]
-; SKYLAKE-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:4.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKYLAKE-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:3.00]
-; SKX-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:4.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fdiv double %a0, %a1
-  %2 = load double, double *%a2, align 8
-  %3 = fdiv double %1, %2
-  ret double %3
-}
-
-define void @test_lfence() {
-; GENERIC-LABEL: test_lfence:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    lfence # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lfence:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    lfence # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lfence:
-; SLM:       # %bb.0:
-; SLM-NEXT:    lfence # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_lfence:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    lfence # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_lfence:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    lfence # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_lfence:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    lfence # sched: [2:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_lfence:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    lfence # sched: [2:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_lfence:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    lfence # sched: [2:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lfence:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    lfence # sched: [2:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_lfence:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    lfence # sched: [2:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lfence:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    lfence # sched: [2:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_lfence:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    lfence # sched: [2:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lfence:
-; SKX:       # %bb.0:
-; SKX-NEXT:    lfence # sched: [2:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_lfence:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    lfence # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_lfence:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    lfence # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_lfence:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    lfence # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_lfence:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    lfence # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_lfence:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    lfence # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_lfence:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    lfence # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void @llvm.x86.sse2.lfence()
-  ret void
-}
-declare void @llvm.x86.sse2.lfence() nounwind readnone
-
-define void @test_mfence() {
-; GENERIC-LABEL: test_mfence:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    mfence # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mfence:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    mfence # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mfence:
-; SLM:       # %bb.0:
-; SLM-NEXT:    mfence # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mfence:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    mfence # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mfence:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    mfence # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mfence:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    mfence # sched: [2:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mfence:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    mfence # sched: [2:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mfence:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    mfence # sched: [2:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mfence:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    mfence # sched: [2:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mfence:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    mfence # sched: [3:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mfence:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    mfence # sched: [3:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mfence:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    mfence # sched: [3:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mfence:
-; SKX:       # %bb.0:
-; SKX-NEXT:    mfence # sched: [3:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mfence:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    mfence # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mfence:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    mfence # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mfence:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    mfence # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mfence:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    mfence # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mfence:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    mfence # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mfence:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    mfence # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void @llvm.x86.sse2.mfence()
-  ret void
-}
-declare void @llvm.x86.sse2.mfence() nounwind readnone
-
-define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-; GENERIC-LABEL: test_maskmovdqu:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maskmovdqu:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maskmovdqu:
-; SLM:       # %bb.0:
-; SLM-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maskmovdqu:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovdqu:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maskmovdqu:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maskmovdqu:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maskmovdqu:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovdqu:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maskmovdqu:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovdqu:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maskmovdqu:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovdqu:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maskmovdqu:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maskmovdqu:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maskmovdqu:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maskmovdqu:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maskmovdqu:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maskmovdqu:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
-define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_maxpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    maxpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    maxpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_maxsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    maxsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    maxsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    maxsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_minpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    minpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    minpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    minpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_minsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    minsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    minsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    minsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movapd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movapd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movapd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movapd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movapd (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movapd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movapd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movapd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movapd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movapd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movapd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movapd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movapd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movapd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movapd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movapd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movapd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movapd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movapd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movapd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movapd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovapd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load <2 x double>, <2 x double> *%a0, align 16
-  %2 = fadd <2 x double> %1, %1
-  store <2 x double> %2, <2 x double> *%a1, align 16
-  ret void
-}
-
-define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movdqa:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movdqa:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movdqa (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movdqa:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movdqa (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movdqa:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movdqa:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movdqa:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movdqa:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movdqa:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movdqa:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movdqa:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movdqa:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movdqa:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movdqa:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movdqa:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movdqa:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movdqa:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movdqa:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movdqa:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movdqa:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load <2 x i64>, <2 x i64> *%a0, align 16
-  %2 = add <2 x i64> %1, %1
-  store <2 x i64> %2, <2 x i64> *%a1, align 16
-  ret void
-}
-
-define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movdqu:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movdqu:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movdqu (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    movdqu %xmm0, (%rsi) # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movdqu:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movdqu (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movdqu:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movdqu:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movdqu:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movdqu:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movdqu:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movdqu:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movdqu:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movdqu:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movdqu:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movdqu:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movdqu:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movdqu:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movdqu:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movdqu:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movdqu:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movdqu:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load <2 x i64>, <2 x i64> *%a0, align 1
-  %2 = add <2 x i64> %1, %1
-  store <2 x i64> %2, <2 x i64> *%a1, align 1
-  ret void
-}
-
-define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_movd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movd %xmm1, %eax # sched: [3:3.00]
-; ATOM-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT:    movd %edi, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    movd %xmm2, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovd %xmm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovd %xmm0, %eax # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; SKX-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [10:0.50]
-; BDVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovd %edi, %xmm1 # sched: [10:0.50]
-; BDVER2-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT:    vmovd %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [8:0.50]
-; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovd %edi, %xmm1 # sched: [8:0.50]
-; BTVER2-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovd %xmm0, %eax # sched: [4:1.00]
-; BTVER2-NEXT:    vmovd %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    movd %edi, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT:    vmovd %edi, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <4 x i32> undef, i32 %a1, i32 0
-  %2 = load i32, i32 *%a2
-  %3 = insertelement <4 x i32> undef, i32 %2, i32 0
-  %4 = add <4 x i32> %a0, %1
-  %5 = add <4 x i32> %a0, %3
-  %6 = extractelement <4 x i32> %4, i32 0
-  %7 = extractelement <4 x i32> %5, i32 0
-  store i32 %6, i32* %a2
-  ret i32 %7
-}
-
-define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_movd_64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
-; GENERIC-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movd_64:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT:    movq %rdi, %xmm2 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
-; ATOM-NEXT:    movq %xmm1, %rax # sched: [3:3.00]
-; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movd_64:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT:    movq %rdi, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    movq %xmm2, %rax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movd_64:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
-; SANDY-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movd_64:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
-; SANDY-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movd_64:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movd_64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovq %xmm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movd_64:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movd_64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovq %xmm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movd_64:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movd_64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movd_64:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
-; SKX-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movd_64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SKX-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movd_64:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [10:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movd_64:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovq %rdi, %xmm1 # sched: [10:0.50]
-; BDVER2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovq %xmm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT:    vmovq %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movd_64:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [8:0.50]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movd_64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovq %rdi, %xmm1 # sched: [8:0.50]
-; BTVER2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovq %xmm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT:    vmovq %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movd_64:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    movq %rdi, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movd_64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT:    vmovq %rdi, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <2 x i64> undef, i64 %a1, i64 0
-  %2 = load i64, i64 *%a2
-  %3 = insertelement <2 x i64> undef, i64 %2, i64 0
-  %4 = add <2 x i64> %a0, %1
-  %5 = add <2 x i64> %a0, %3
-  %6 = extractelement <2 x i64> %4, i64 0
-  %7 = extractelement <2 x i64> %5, i64 0
-  store i64 %6, i64* %a2
-  ret i64 %7
-}
-
-define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movhpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movhpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    movhpd %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movhpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movhpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movhpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movhpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movhpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movhpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movhpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movhpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movhpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movhpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movhpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movhpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movhpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movhpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movhpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movhpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movhpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movhpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast x86_mmx* %a2 to double*
-  %2 = load double, double *%1, align 8
-  %3 = insertelement <2 x double> %a1, double %2, i32 1
-  %4 = fadd <2 x double> %a0, %3
-  %5 = extractelement <2 x double> %4, i32 1
-  store double %5, double* %1
-  ret <2 x double> %3
-}
-
-define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movlpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movlpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    movlpd %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movlpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movlpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movlpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movlpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movlpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movlpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movlpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movlpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movlpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movlpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movlpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movlpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movlpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movlpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movlpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movlpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movlpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movlpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast x86_mmx* %a2 to double*
-  %2 = load double, double *%1, align 8
-  %3 = insertelement <2 x double> %a1, double %2, i32 0
-  %4 = fadd <2 x double> %a0, %3
-  %5 = extractelement <2 x double> %4, i32 0
-  store double %5, double* %1
-  ret <2 x double> %3
-}
-
-define i32 @test_movmskpd(<2 x double> %a0) {
-; GENERIC-LABEL: test_movmskpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movmskpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movmskpd %xmm0, %eax # sched: [3:3.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movmskpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movmskpd %xmm0, %eax # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movmskpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movmskpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movmskpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movmskpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movmskpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movmskpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movmskpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movmskpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movmskpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movmskpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movmskpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movmskpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovmskpd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movmskpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movmskpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movmskpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movmskpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovmskpd %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-  ret i32 %1
-}
-declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
-
-define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movntdqa:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntdqa:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntdqa:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntdqa:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntdqa:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntdqa:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntdqa:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntdqa:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntdqa:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntdqa:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntdqa:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntdqa:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntdqa:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntdqa:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntdqa:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntdqa:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntdqa:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntdqa:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntdqa:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = add <2 x i64> %a0, %a0
-  store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0
-  ret void
-}
-
-define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movntpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fadd <2 x double> %a0, %a0
-  store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0
-  ret void
-}
-
-define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
-; GENERIC-LABEL: test_movq_mem:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq_mem:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq_mem:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movq_mem:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movq_mem:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movq_mem:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movq_mem:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movq_mem:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq_mem:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movq_mem:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq_mem:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movq_mem:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq_mem:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movq_mem:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movq_mem:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vmovq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movq_mem:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movq_mem:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movq_mem:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movq_mem:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64* %a1, align 1
-  %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0
-  %3 = add <2 x i64> %a0, %2
-  %4 = extractelement <2 x i64> %3, i32 0
-  store i64 %4, i64 *%a1, align 1
-  ret <2 x i64> %3
-}
-
-define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
-; GENERIC-LABEL: test_movq_reg:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq_reg:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq_reg:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movq_reg:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movq_reg:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SANDY-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movq_reg:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movq_reg:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; HASWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movq_reg:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq_reg:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; BROADWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movq_reg:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq_reg:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movq_reg:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq_reg:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movq_reg:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movq_reg:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50]
-; BDVER2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movq_reg:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movq_reg:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; BTVER2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movq_reg:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movq_reg:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
-  %2 = add <2 x i64> %a1, %1
-  ret <2 x i64> %2
-}
-
-define void @test_movsd_mem(double* %a0, double* %a1) {
-; GENERIC-LABEL: test_movsd_mem:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsd_mem:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT:    addsd %xmm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsd_mem:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsd_mem:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsd_mem:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsd_mem:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsd_mem:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsd_mem:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsd_mem:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsd_mem:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsd_mem:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsd_mem:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsd_mem:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsd_mem:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsd_mem:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovsd %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsd_mem:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsd_mem:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovsd %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsd_mem:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsd_mem:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load double, double* %a0, align 1
-  %2 = fadd double %1, %1
-  store double %2, double *%a1, align 1
-  ret void
-}
-
-define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
-; GENERIC-LABEL: test_movsd_reg:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; GENERIC-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsd_reg:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsd_reg:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsd_reg:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SANDY-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsd_reg:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsd_reg:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsd_reg:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsd_reg:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsd_reg:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsd_reg:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsd_reg:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsd_reg:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKX-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsd_reg:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsd_reg:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsd_reg:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsd_reg:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsd_reg:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsd_reg:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsd_reg:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
-  ret <2 x double> %1
-}
-
-define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movupd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movupd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movupd (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    movupd %xmm0, (%rsi) # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movupd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movupd (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movupd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movupd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movupd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movupd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movupd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movupd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movupd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movupd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movupd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movupd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movupd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movupd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movupd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movupd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movupd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movupd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovupd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load <2 x double>, <2 x double> *%a0, align 1
-  %2 = fadd <2 x double> %1, %1
-  store <2 x double> %2, <2 x double> *%a1, align 1
-  ret void
-}
-
-define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_mulpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    mulpd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:2.00]
-; BTVER2-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [9:2.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BTVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fmul <2 x double> %a0, %a1
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = fmul <2 x double> %1, %2
-  ret <2 x double> %3
-}
-
-define double @test_mulsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_mulsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    mulsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:2.00]
-; BTVER2-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:2.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BTVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fmul double %a0, %a1
-  %2 = load double, double *%a2, align 8
-  %3 = fmul double %1, %2
-  ret double %3
-}
-
-define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_orpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_orpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    orpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_orpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    orpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_orpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_orpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_orpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_orpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_orpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_orpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_orpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_orpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_orpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_orpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_orpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_orpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_orpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_orpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_orpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_orpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast <2 x double> %a0 to <4 x i32>
-  %2 = bitcast <2 x double> %a1 to <4 x i32>
-  %3 = or <4 x i32> %1, %2
-  %4 = load <2 x double>, <2 x double> *%a2, align 16
-  %5 = bitcast <2 x double> %4 to <4 x i32>
-  %6 = or <4 x i32> %3, %5
-  %7 = bitcast <4 x i32> %6 to <2 x double>
-  %8 = fadd <2 x double> %a1, %7
-  ret <2 x double> %8
-}
-
-define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_packssdw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packssdw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    packssdw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packssdw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    packssdw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packssdw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packssdw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packssdw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packssdw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packssdw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packssdw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packssdw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packssdw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packssdw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packssdw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packssdw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packssdw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packssdw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packssdw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packssdw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packssdw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
-  %2 = bitcast <8 x i16> %1 to <4 x i32>
-  %3 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3)
-  ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_packsswb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packsswb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    packsswb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packsswb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    packsswb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packsswb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packsswb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packsswb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packsswb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packsswb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packsswb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packsswb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packsswb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packsswb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packsswb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packsswb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packsswb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packsswb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packsswb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packsswb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packsswb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = bitcast <16 x i8> %1 to <8 x i16>
-  %3 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3)
-  ret <16 x i8> %4
-}
-declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_packuswb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packuswb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    packuswb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packuswb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    packuswb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packuswb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packuswb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packuswb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packuswb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packuswb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packuswb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packuswb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packuswb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packuswb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packuswb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packuswb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packuswb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packuswb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packuswb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packuswb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packuswb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = bitcast <16 x i8> %1 to <8 x i16>
-  %3 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3)
-  ret <16 x i8> %4
-}
-declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = add <16 x i8> %a0, %a1
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = add <16 x i8> %1, %2
-  ret <16 x i8> %3
-}
-
-define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_paddd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = add <4 x i32> %a0, %a1
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = add <4 x i32> %1, %2
-  ret <4 x i32> %3
-}
-
-define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_paddq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    paddq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = add <2 x i64> %a0, %a1
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = add <2 x i64> %1, %2
-  ret <2 x i64> %3
-}
-
-define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddsb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddsb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddsb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddsb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddsb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddsb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddsb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddsb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddsb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddsb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddsb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddsb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddsb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddsb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddsb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddsw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddsw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddsw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddsw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddsw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddsw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddsw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddsw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddsw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddsw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddsw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddsw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddsw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddsw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddusb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddusb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddusb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddusb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddusb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddusb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddusb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddusb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddusb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddusb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddusb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddusb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddusb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddusb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddusb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddusb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddusw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddusw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddusw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddusw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddusw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddusw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddusw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddusw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddusw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddusw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddusw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddusw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddusw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddusw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddusw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddusw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    paddw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    paddw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = add <8 x i16> %a0, %a1
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = add <8 x i16> %1, %2
-  ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pand:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pand:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pand (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pand:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pand (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pand:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pand:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pand:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pand:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pand:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pand:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pand:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pand:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pand:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pand:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pand:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pand:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pand:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pand:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pand:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pand:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = and <2 x i64> %a0, %a1
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = and <2 x i64> %1, %2
-  %4 = add <2 x i64> %3, %a1
-  ret <2 x i64> %4
-}
-
-define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pandn:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pandn:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    pandn (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pandn:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    pandn (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pandn:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pandn:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pandn:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pandn:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pandn:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pandn:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pandn:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pandn:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pandn:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pandn:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pandn:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pandn:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pandn:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pandn:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pandn:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pandn:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = xor <2 x i64> %a0, <i64 -1, i64 -1>
-  %2 = and <2 x i64> %a1, %1
-  %3 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
-  %5 = and <2 x i64> %3, %4
-  %6 = add <2 x i64> %2, %5
-  ret <2 x i64> %6
-}
-
-define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pavgb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pavgb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pavgb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pavgb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pavgb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pavgb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pavgb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pavgb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pavgb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pavgb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pavgb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pavgb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pavgb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pavgb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pavgb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pavgb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = zext <16 x i8> %a0 to <16 x i16>
-  %2 = zext <16 x i8> %a1 to <16 x i16>
-  %3 = add <16 x i16> %1, %2
-  %4 = add <16 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %5 = lshr <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %6 = trunc <16 x i16> %5 to <16 x i8>
-  %7 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %8 = zext <16 x i8> %6 to <16 x i16>
-  %9 = zext <16 x i8> %7 to <16 x i16>
-  %10 = add <16 x i16> %8, %9
-  %11 = add <16 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %12 = lshr <16 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %13 = trunc <16 x i16> %12 to <16 x i8>
-  ret <16 x i8> %13
-}
-
-define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pavgw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pavgw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pavgw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pavgw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pavgw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pavgw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pavgw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pavgw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pavgw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pavgw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pavgw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pavgw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pavgw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pavgw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pavgw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pavgw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = zext <8 x i16> %a0 to <8 x i32>
-  %2 = zext <8 x i16> %a1 to <8 x i32>
-  %3 = add <8 x i32> %1, %2
-  %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %5 = lshr <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %6 = trunc <8 x i32> %5 to <8 x i16>
-  %7 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %8 = zext <8 x i16> %6 to <8 x i32>
-  %9 = zext <8 x i16> %7 to <8 x i32>
-  %10 = add <8 x i32> %8, %9
-  %11 = add <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %12 = lshr <8 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %13 = trunc <8 x i32> %12 to <8 x i16>
-  ret <8 x i16> %13
-}
-
-define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpeqb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp eq <16 x i8> %a0, %a1
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = icmp eq <16 x i8> %a0, %2
-  %4 = or <16 x i1> %1, %3
-  %5 = sext <16 x i1> %4 to <16 x i8>
-  ret <16 x i8> %5
-}
-
-define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpeqd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp eq <4 x i32> %a0, %a1
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = icmp eq <4 x i32> %a0, %2
-  %4 = or <4 x i1> %1, %3
-  %5 = sext <4 x i1> %4 to <4 x i32>
-  ret <4 x i32> %5
-}
-
-define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpeqw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp eq <8 x i16> %a0, %a1
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = icmp eq <8 x i16> %a0, %2
-  %4 = or <8 x i1> %1, %3
-  %5 = sext <8 x i1> %4 to <8 x i16>
-  ret <8 x i16> %5
-}
-
-define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpgtb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp sgt <16 x i8> %a0, %a1
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = icmp sgt <16 x i8> %a0, %2
-  %4 = or <16 x i1> %1, %3
-  %5 = sext <16 x i1> %4 to <16 x i8>
-  ret <16 x i8> %5
-}
-
-define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpgtd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp sgt <4 x i32> %a0, %a1
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = icmp eq <4 x i32> %a0, %2
-  %4 = or <4 x i1> %1, %3
-  %5 = sext <4 x i1> %4 to <4 x i32>
-  ret <4 x i32> %5
-}
-
-define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpgtw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = icmp sgt <8 x i16> %a0, %a1
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = icmp sgt <8 x i16> %a0, %2
-  %4 = or <8 x i1> %1, %3
-  %5 = sext <8 x i1> %4 to <8 x i16>
-  ret <8 x i16> %5
-}
-
-define i16 @test_pextrw(<8 x i16> %a0) {
-; GENERIC-LABEL: test_pextrw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pextrw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pextrw $6, %xmm0, %eax # sched: [4:2.00]
-; ATOM-NEXT:    # kill: def $ax killed $ax killed $eax
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pextrw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pextrw $6, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT:    # kill: def $ax killed $ax killed $eax
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT:    # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpextrw $6, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = extractelement <8 x i16> %a0, i32 6
-  ret i16 %1
-}
-
-define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
-; GENERIC-LABEL: test_pinsrw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pinsrw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pinsrw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pinsrw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pinsrw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pinsrw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; HASWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pinsrw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pinsrw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pinsrw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; SKYLAKE-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pinsrw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; SKX-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pinsrw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [12:0.50]
-; BDVER2-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pinsrw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
-; BDVER2-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pinsrw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
-; BTVER2-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pinsrw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
-; BTVER2-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pinsrw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pinsrw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1
-  %2 = load i16, i16 *%a2
-  %3 = insertelement <8 x i16> %1, i16 %2, i32 3
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaddwd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaddwd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaddwd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaddwd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaddwd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaddwd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaddwd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaddwd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddwd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaddwd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddwd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaddwd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddwd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaddwd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaddwd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaddwd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaddwd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaddwd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaddwd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = bitcast <4 x i32> %1 to <8 x i16>
-  %3 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3)
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaxsw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxsw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxsw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxsw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxsw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxsw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxsw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxsw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxsw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxsw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxsw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxsw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxsw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxsw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxsw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxsw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaxub:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxub:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pmaxub (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxub:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pmaxub (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxub:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxub:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxub:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxub:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxub:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxub:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxub:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxub:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxub:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxub:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxub:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxub:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxub:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxub:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxub:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxub:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pminsw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminsw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pminsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminsw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pminsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminsw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminsw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminsw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminsw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminsw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminsw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminsw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminsw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminsw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminsw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminsw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminsw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminsw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pminub:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminub:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pminub (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminub:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pminub (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminub:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminub:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminub:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminub:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminub:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminub:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminub:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminub:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminub:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminub:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminub:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminub:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminub:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminub:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminub:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminub:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-define i32 @test_pmovmskb(<16 x i8> %a0) {
-; GENERIC-LABEL: test_pmovmskb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmovmskb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmovmskb %xmm0, %eax # sched: [3:3.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmovmskb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmovmskb %xmm0, %eax # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovmskb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovmskb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovmskb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovmskb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovmskb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovmskb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovmskb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovmskb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovmskb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovmskb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovmskb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovmskb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmovmskb %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovmskb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovmskb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovmskb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovmskb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmovmskb %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-  ret i32 %1
-}
-declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhuw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhuw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhuw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulhuw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulhuw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulhuw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulhuw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulhuw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhuw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulhuw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhuw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulhuw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhuw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulhuw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulhuw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulhuw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulhuw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulhuw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulhuw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    pmulhw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulhw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulhw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulhw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulhw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulhw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulhw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulhw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulhw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulhw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulhw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulhw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulhw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulhw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmullw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmullw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    pmullw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmullw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmullw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmullw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmullw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmullw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmullw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmullw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmullw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmullw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmullw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmullw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmullw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmullw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmullw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmullw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmullw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmullw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = mul <8 x i16> %a0, %a1
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = mul <8 x i16> %1, %2
-  ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pmuludq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmuludq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmuludq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmuludq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmuludq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmuludq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmuludq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmuludq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuludq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmuludq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuludq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmuludq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuludq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmuludq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmuludq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmuludq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmuludq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmuludq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmuludq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
-  %2 = bitcast <2 x i64> %1 to <4 x i32>
-  %3 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3)
-  ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_por:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_por:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_por:
-; SLM:       # %bb.0:
-; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    por (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_por:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_por:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_por:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_por:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_por:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_por:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_por:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_por:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_por:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_por:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_por:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_por:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_por:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_por:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_por:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    por (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_por:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = or <2 x i64> %a0, %a1
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = or <2 x i64> %1, %2
-  %4 = add <2 x i64> %3, %a1
-  ret <2 x i64> %4
-}
-
-define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psadbw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psadbw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    psadbw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psadbw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psadbw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psadbw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psadbw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psadbw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psadbw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psadbw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psadbw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psadbw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psadbw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psadbw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psadbw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; SKX-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psadbw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psadbw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [4:0.50]
-; BDVER2-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psadbw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; BDVER2-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psadbw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psadbw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psadbw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psadbw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = bitcast <2 x i64> %1 to <16 x i8>
-  %3 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3)
-  ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pshufd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; GENERIC-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00]
-; SLM-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshufd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshufd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
-; SANDY-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshufd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshufd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; HASWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshufd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; BROADWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshufd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshufd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshufd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshufd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
-; BDVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [2:0.50]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshufd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshufd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshufd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshufd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
-; ZNVER1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  %2 = load <4 x i32>, <4 x i32> *%a1, align 16
-  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-
-define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pshufhw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; GENERIC-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufhw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufhw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00]
-; SLM-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshufhw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshufhw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; SANDY-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshufhw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshufhw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshufhw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufhw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; BROADWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshufhw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufhw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshufhw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufhw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKX-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshufhw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshufhw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; BDVER2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50]
-; BDVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshufhw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshufhw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BTVER2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshufhw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshufhw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
-; ZNVER1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
-  %2 = load <8 x i16>, <8 x i16> *%a1, align 16
-  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
-  %4 = add <8 x i16> %1, %3
-  ret <8 x i16> %4
-}
-
-define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pshuflw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshuflw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshuflw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00]
-; SLM-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshuflw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshuflw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; SANDY-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshuflw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshuflw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshuflw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshuflw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; BROADWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshuflw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshuflw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshuflw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshuflw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshuflw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshuflw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; BDVER2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50]
-; BDVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshuflw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshuflw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BTVER2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshuflw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshuflw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
-; ZNVER1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
-; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
-  %2 = load <8 x i16>, <8 x i16> *%a1, align 16
-  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
-  %4 = add <8 x i16> %1, %3
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pslld:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pslld:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    pslld (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pslld:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pslld %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    pslld (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pslld:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pslld:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pslld:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pslld:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pslld:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslld:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pslld:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslld:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pslld:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslld:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pslld:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    pslld $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pslld:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pslld:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    pslld $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pslld:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pslld:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pslld:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2)
-  %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2)
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
-
-define <4 x i32> @test_pslldq(<4 x i32> %a0) {
-; GENERIC-LABEL: test_pslldq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pslldq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pslldq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pslldq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pslldq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pslldq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pslldq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pslldq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslldq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pslldq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslldq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pslldq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslldq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pslldq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pslldq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pslldq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pslldq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pslldq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pslldq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psllq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psllq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psllq %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psllq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psllq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psllq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psllq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psllq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psllq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psllq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psllq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psllq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psllq $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psllq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psllq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psllq $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psllq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psllq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psllq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2)
-  %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2)
-  ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
-
-define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psllw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psllw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psllw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psllw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psllw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psllw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psllw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psllw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psllw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psllw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psllw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psllw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psllw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psllw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psllw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psllw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psllw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psllw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psllw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2)
-  %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2)
-  ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
-
-define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrad:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrad:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psrad (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrad:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psrad %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psrad (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrad:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrad:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrad:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrad:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrad:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrad:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrad:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrad:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrad:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrad:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrad:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psrad $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrad:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrad:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psrad $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrad:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrad:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrad:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2)
-  %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
-
-define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psraw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psraw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psraw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psraw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psraw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psraw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psraw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psraw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psraw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psraw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psraw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psraw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psraw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psraw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psraw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psraw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psraw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psraw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psraw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psraw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psraw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psraw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psraw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psraw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2)
-  %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
-  ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
-
-define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrld:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrld:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psrld (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrld:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psrld %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psrld (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrld:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrld:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrld:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrld:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrld:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrld:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrld:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrld:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrld:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrld:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrld:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psrld $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrld:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrld:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psrld $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrld:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrld:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrld:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2)
-  %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2)
-  ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
-
-define <4 x i32> @test_psrldq(<4 x i32> %a0) {
-; GENERIC-LABEL: test_psrldq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrldq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrldq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrldq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrldq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrldq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrldq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrldq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrldq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrldq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrldq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrldq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrldq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrldq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrldq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrldq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrldq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrldq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrldq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psrlq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psrlq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psrlq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrlq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrlq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrlq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrlq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrlq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrlq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrlq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrlq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psrlq $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrlq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrlq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psrlq $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrlq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrlq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrlq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2)
-  %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2)
-  ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
-
-define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psrlw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psrlw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    psrlw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrlw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrlw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrlw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrlw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrlw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrlw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrlw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrlw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT:    psrlw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrlw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrlw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT:    psrlw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrlw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrlw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrlw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2)
-  %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2)
-  ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
-
-define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sub <16 x i8> %a0, %a1
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = sub <16 x i8> %1, %2
-  ret <16 x i8> %3
-}
-
-define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psubd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sub <4 x i32> %a0, %a1
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = sub <4 x i32> %1, %2
-  ret <4 x i32> %3
-}
-
-define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psubq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    psubq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sub <2 x i64> %a0, %a1
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = sub <2 x i64> %1, %2
-  ret <2 x i64> %3
-}
-
-define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubsb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubsb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubsb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubsb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubsb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubsb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubsb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubsb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubsb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubsb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubsb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubsb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubsb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubsb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubsb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubsw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubsw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubsw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubsw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubsw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubsw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubsw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubsw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubsw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubsw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubsw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubsw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubsw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubsw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubusb:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusb:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubusb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusb:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubusb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubusb:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubusb:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubusb:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubusb:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubusb:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusb:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubusb:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusb:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubusb:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusb:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubusb:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubusb:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubusb:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubusb:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubusb:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubusb:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
-  ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubusw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubusw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubusw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubusw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubusw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubusw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubusw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubusw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubusw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubusw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubusw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubusw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubusw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubusw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubusw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubusw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    psubw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    psubw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = sub <8 x i16> %a0, %a1
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = sub <8 x i16> %1, %2
-  ret <8 x i16> %3
-}
-
-define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_punpckhbw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhbw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; ATOM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhbw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SLM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhbw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhbw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhbw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhbw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhbw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhbw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhbw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhbw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhbw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhbw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhbw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhbw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhbw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhbw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhbw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhbw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-  ret <16 x i8> %3
-}
-
-define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckhdq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhdq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; ATOM-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhdq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SLM-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhdq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhdq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhdq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhdq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhdq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhdq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhdq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhdq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhdq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhdq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhdq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhdq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhdq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhdq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhdq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhdq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-
-define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_punpckhqdq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhqdq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhqdq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhqdq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhqdq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhqdq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhqdq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhqdq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhqdq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhqdq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhqdq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhqdq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhqdq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhqdq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhqdq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhqdq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhqdq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhqdq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhqdq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3>
-  %4 = add <2 x i64> %1, %3
-  ret <2 x i64> %4
-}
-
-define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_punpckhwd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhwd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; ATOM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhwd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SLM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhwd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhwd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhwd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhwd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhwd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhwd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhwd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhwd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhwd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhwd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhwd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhwd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhwd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhwd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhwd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhwd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  ret <8 x i16> %3
-}
-
-define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_punpcklbw:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklbw:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; ATOM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklbw:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SLM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklbw:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklbw:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklbw:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklbw:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklbw:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklbw:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklbw:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklbw:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklbw:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklbw:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklbw:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklbw:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklbw:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklbw:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklbw:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklbw:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-  %2 = load <16 x i8>, <16 x i8> *%a2, align 16
-  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-  ret <16 x i8> %3
-}
-
-define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckldq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckldq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckldq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckldq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckldq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckldq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckldq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckldq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckldq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckldq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckldq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckldq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckldq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckldq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckldq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckldq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckldq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckldq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckldq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-  %2 = load <4 x i32>, <4 x i32> *%a2, align 16
-  %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-  %4 = add <4 x i32> %1, %3
-  ret <4 x i32> %4
-}
-
-define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_punpcklqdq:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklqdq:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklqdq:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SLM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklqdq:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklqdq:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklqdq:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklqdq:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklqdq:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklqdq:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklqdq:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklqdq:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklqdq:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklqdq:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklqdq:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklqdq:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklqdq:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklqdq:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklqdq:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklqdq:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2>
-  %4 = add <2 x i64> %1, %3
-  ret <2 x i64> %4
-}
-
-define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_punpcklwd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklwd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; ATOM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklwd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SLM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklwd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklwd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklwd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklwd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklwd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklwd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklwd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklwd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklwd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklwd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklwd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklwd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklwd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklwd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklwd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklwd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-  %2 = load <8 x i16>, <8 x i16> *%a2, align 16
-  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-  ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pxor:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pxor:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    pxor (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pxor:
-; SLM:       # %bb.0:
-; SLM-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    pxor (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pxor:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pxor:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pxor:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pxor:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pxor:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pxor:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pxor:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pxor:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pxor:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pxor:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pxor:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pxor:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pxor:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pxor:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pxor:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pxor:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = xor <2 x i64> %a0, %a1
-  %2 = load <2 x i64>, <2 x i64> *%a2, align 16
-  %3 = xor <2 x i64> %1, %2
-  %4 = add <2 x i64> %3, %a1
-  ret <2 x i64> %4
-}
-
-define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_shufpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shufpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shufpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SLM-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_shufpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_shufpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_shufpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_shufpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_shufpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shufpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_shufpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shufpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_shufpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shufpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_shufpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_shufpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_shufpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_shufpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_shufpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_shufpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; ZNVER1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2>
-  %4 = fadd <2 x double> %1, %3
-  ret <2 x double> %4
-}
-
-define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_sqrtpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
-; GENERIC-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [125:62.50]
-; ATOM-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [125:62.50]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    sqrtpd (%rdi), %xmm1 # sched: [74:70.00]
-; SLM-NEXT:    sqrtpd %xmm0, %xmm0 # sched: [71:70.00]
-; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
-; SANDY-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:21.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
-; HASWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [22:14.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [22:14.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
-; BROADWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [21:14.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
-; BROADWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [21:14.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
-; SKX-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [9:13.50]
-; BDVER2-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [14:13.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [14:13.50]
-; BDVER2-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [27:27.00]
-; BTVER2-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [32:27.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [32:27.00]
-; BTVER2-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:20.00]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:20.00]
-; ZNVER1-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
-  %2 = load <2 x double>, <2 x double> *%a1, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2)
-  %4 = fadd <2 x double> %1, %3
-  ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
-
-; TODO - sqrtsd_m
-
-define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_sqrtsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
-; GENERIC-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movapd (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [62:31.00]
-; ATOM-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [62:31.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movapd (%rdi), %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [35:35.00]
-; SLM-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [35:35.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [16:14.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [16:8.00]
-; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [16:8.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00]
-; BROADWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [9:13.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [9:13.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [27:27.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
-  %2 = load <2 x double>, <2 x double> *%a1, align 16
-  %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
-  %4 = fadd <2 x double> %1, %3
-  ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-
-define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_subpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    subpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    subpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fsub <2 x double> %a0, %a1
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = fsub <2 x double> %1, %2
-  ret <2 x double> %3
-}
-
-define double @test_subsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_subsd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subsd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    subsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    subsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subsd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    subsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subsd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subsd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subsd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subsd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subsd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subsd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subsd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subsd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subsd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subsd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subsd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subsd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subsd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subsd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subsd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subsd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = fsub double %a0, %a1
-  %2 = load double, double *%a2, align 8
-  %3 = fsub double %1, %2
-  ret double %3
-}
-
-define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_ucomisd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
-; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
-; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
-; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
-; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ucomisd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    ucomisd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    setnp %al # sched: [1:0.50]
-; ATOM-NEXT:    sete %cl # sched: [1:0.50]
-; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT:    ucomisd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT:    setnp %al # sched: [1:0.50]
-; ATOM-NEXT:    sete %dl # sched: [1:0.50]
-; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ucomisd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    setnp %al # sched: [1:0.50]
-; SLM-NEXT:    sete %cl # sched: [1:0.50]
-; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT:    ucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    setnp %al # sched: [1:0.50]
-; SLM-NEXT:    sete %dl # sched: [1:0.50]
-; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_ucomisd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_ucomisd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-NEXT:    sete %cl # sched: [1:0.50]
-; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.50]
-; SANDY-NEXT:    sete %dl # sched: [1:0.50]
-; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_ucomisd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_ucomisd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
-; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
-; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
-; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_ucomisd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ucomisd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_ucomisd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ucomisd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_ucomisd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ucomisd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-NEXT:    sete %cl # sched: [1:0.50]
-; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    setnp %al # sched: [1:0.50]
-; SKX-NEXT:    sete %dl # sched: [1:0.50]
-; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_ucomisd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_ucomisd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vucomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-NEXT:    sete %cl # sched: [1:0.50]
-; BDVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT:    vucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BDVER2-NEXT:    sete %dl # sched: [1:0.50]
-; BDVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_ucomisd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_ucomisd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
-; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
-; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
-; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_ucomisd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_ucomisd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT:    vucomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 8
-  %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2)
-  %4 = or i32 %1, %3
-  ret i32 %4
-}
-declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_unpckhpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpckhpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpckhpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpckhpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpckhpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpckhpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpckhpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpckhpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpckhpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpckhpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpckhpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpckhpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpckhpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpckhpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpckhpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpckhpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpckhpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpckhpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpckhpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3>
-  %4 = fadd <2 x double> %1, %3
-  ret <2 x double> %4
-}
-
-define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_unpcklpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpcklpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm2, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpcklpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [4:1.00]
-; SLM-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SLM-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpcklpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpcklpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpcklpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpcklpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpcklpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpcklpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpcklpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpcklpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpcklpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpcklpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpcklpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpcklpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpcklpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpcklpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpcklpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpcklpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2>
-  %4 = fadd <2 x double> %1, %3
-  ret <2 x double> %4
-}
-
-define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_xorpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xorpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    xorpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xorpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    xorpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_xorpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_xorpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_xorpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_xorpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_xorpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xorpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_xorpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xorpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_xorpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xorpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_xorpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_xorpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_xorpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_xorpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_xorpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_xorpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = bitcast <2 x double> %a0 to <4 x i32>
-  %2 = bitcast <2 x double> %a1 to <4 x i32>
-  %3 = xor <4 x i32> %1, %2
-  %4 = load <2 x double>, <2 x double> *%a2, align 16
-  %5 = bitcast <2 x double> %4 to <4 x i32>
-  %6 = xor <4 x i32> %3, %5
-  %7 = bitcast <4 x i32> %6 to <2 x double>
-  %8 = fadd <2 x double> %a1, %7
-  ret <2 x double> %8
-}
-
-!0 = !{i32 1}

Removed: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=353042&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (removed)
@@ -1,1549 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_addsubpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsubpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsubpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsubpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsubpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsubpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsubpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsubpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsubpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsubpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsubpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsubpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsubpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsubpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsubpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_addsubps:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsubps:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    addsubps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsubps:
-; SLM:       # %bb.0:
-; SLM-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    addsubps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsubps:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubps:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsubps:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsubps:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsubps:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubps:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsubps:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubps:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsubps:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubps:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsubps:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsubps:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsubps:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsubps:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsubps:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsubps:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
-  %2 = load <4 x float>, <4 x float> *%a2, align 16
-  %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
-  ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_haddpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_haddpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    haddpd %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT:    haddpd (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_haddpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    haddpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_haddpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_haddpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_haddpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_haddpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_haddpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_haddpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_haddpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_haddpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_haddpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_haddpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_haddpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_haddpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_haddps:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_haddps:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    haddps %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT:    haddps (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_haddps:
-; SLM:       # %bb.0:
-; SLM-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    haddps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_haddps:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddps:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_haddps:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_haddps:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_haddps:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddps:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_haddps:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddps:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_haddps:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddps:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_haddps:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_haddps:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_haddps:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_haddps:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_haddps:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_haddps:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
-  %2 = load <4 x float>, <4 x float> *%a2, align 16
-  %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
-  ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_hsubpd:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_hsubpd:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_hsubpd:
-; SLM:       # %bb.0:
-; SLM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_hsubpd:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubpd:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_hsubpd:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_hsubpd:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_hsubpd:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubpd:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_hsubpd:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubpd:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_hsubpd:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubpd:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_hsubpd:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_hsubpd:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_hsubpd:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_hsubpd:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_hsubpd:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_hsubpd:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
-  %2 = load <2 x double>, <2 x double> *%a2, align 16
-  %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
-  ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_hsubps:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_hsubps:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    hsubps %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT:    hsubps (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_hsubps:
-; SLM:       # %bb.0:
-; SLM-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    hsubps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_hsubps:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubps:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_hsubps:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_hsubps:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_hsubps:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubps:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_hsubps:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubps:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_hsubps:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubps:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_hsubps:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_hsubps:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_hsubps:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_hsubps:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_hsubps:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_hsubps:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
-  %2 = load <4 x float>, <4 x float> *%a2, align 16
-  %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
-  ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <16 x i8> @test_lddqu(i8* %a0) {
-; GENERIC-LABEL: test_lddqu:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lddqu:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lddqu:
-; SLM:       # %bb.0:
-; SLM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_lddqu:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_lddqu:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_lddqu:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_lddqu:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_lddqu:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lddqu:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_lddqu:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lddqu:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_lddqu:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lddqu:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_lddqu:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_lddqu:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_lddqu:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_lddqu:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_lddqu:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_lddqu:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vlddqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
-  ret <16 x i8> %1
-}
-declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
-
-define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
-; GENERIC-LABEL: test_monitor:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT:    monitor # sched: [100:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_monitor:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
-; ATOM-NEXT:    monitor # sched: [45:22.50]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_monitor:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
-; SLM-NEXT:    monitor # sched: [100:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_monitor:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
-; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_monitor:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    movl %esi, %ecx # sched: [1:0.33]
-; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SANDY-NEXT:    monitor # sched: [100:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_monitor:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_monitor:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    monitor # sched: [100:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_monitor:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_monitor:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT:    monitor # sched: [100:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_monitor:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_monitor:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_monitor:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_monitor:
-; SKX:       # %bb.0:
-; SKX-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; SKX-NEXT:    monitor # sched: [100:0.25]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_monitor:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    monitor # sched: [100:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_monitor:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BDVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; BDVER2-NEXT:    monitor # sched: [100:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_monitor:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_monitor:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT:    monitor # sched: [100:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_monitor:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    monitor # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_monitor:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    monitor # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
-  ret void
-}
-declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
-
-define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movddup:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
-; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movddup:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
-; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movddup:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
-; SLM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SLM-NEXT:    subpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movddup:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
-; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movddup:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
-; SANDY-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movddup:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movddup:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; HASWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movddup:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movddup:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; BROADWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movddup:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movddup:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKYLAKE-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movddup:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SKX-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movddup:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SKX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKX-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movddup:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movddup:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50]
-; BDVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50]
-; BDVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movddup:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movddup:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
-; BTVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movddup:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movddup:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
-; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; ZNVER1-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = load <2 x double>, <2 x double> *%a1, align 16
-  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
-  ret <2 x double> %4
-}
-
-define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movshdup:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movshdup:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movshdup:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
-; SLM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movshdup:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movshdup:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movshdup:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movshdup:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movshdup:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movshdup:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
-; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movshdup:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movshdup:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movshdup:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movshdup:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movshdup:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movshdup:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50]
-; BDVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50]
-; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movshdup:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movshdup:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
-; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
-; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movshdup:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movshdup:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
-; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
-; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  %2 = load <4 x float>, <4 x float> *%a1, align 16
-  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  %4 = fadd <4 x float> %1, %3
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movsldup:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsldup:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsldup:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
-; SLM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsldup:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsldup:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsldup:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsldup:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsldup:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
-; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsldup:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
-; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsldup:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsldup:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsldup:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsldup:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKX-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsldup:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50]
-; BDVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50]
-; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsldup:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50]
-; BDVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50]
-; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsldup:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
-; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsldup:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
-; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
-; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsldup:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsldup:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
-; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
-; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  %2 = load <4 x float>, <4 x float> *%a1, align 16
-  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  %4 = fadd <4 x float> %1, %3
-  ret <4 x float> %4
-}
-
-define void @test_mwait(i32 %a0, i32 %a1) {
-; GENERIC-LABEL: test_mwait:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    mwait # sched: [100:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mwait:
-; ATOM:       # %bb.0:
-; ATOM-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; ATOM-NEXT:    mwait # sched: [46:23.00]
-; ATOM-NEXT:    retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mwait:
-; SLM:       # %bb.0:
-; SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; SLM-NEXT:    mwait # sched: [100:1.00]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mwait:
-; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
-; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
-; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mwait:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    movl %esi, %eax # sched: [1:0.33]
-; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
-; SANDY-NEXT:    mwait # sched: [100:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mwait:
-; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
-; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mwait:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; HASWELL-NEXT:    mwait # sched: [20:2.50]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mwait:
-; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
-; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mwait:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; BROADWELL-NEXT:    mwait # sched: [100:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mwait:
-; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
-; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mwait:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mwait:
-; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
-; SKX-SSE-NEXT:    retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mwait:
-; SKX:       # %bb.0:
-; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; SKX-NEXT:    mwait # sched: [20:2.50]
-; SKX-NEXT:    retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mwait:
-; BDVER2-SSE:       # %bb.0:
-; BDVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; BDVER2-SSE-NEXT:    mwait # sched: [100:0.50]
-; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mwait:
-; BDVER2:       # %bb.0:
-; BDVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; BDVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; BDVER2-NEXT:    mwait # sched: [100:0.50]
-; BDVER2-NEXT:    retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mwait:
-; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
-; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mwait:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    mwait # sched: [100:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mwait:
-; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    mwait # sched: [100:0.25]
-; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mwait:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT:    mwait # sched: [100:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
-  ret void
-}
-declare void @llvm.x86.sse3.mwait(i32, i32)




More information about the llvm-commits mailing list