[llvm] r328423 - [X86][SSE] Ensure we're testing both non-VEX/VEX variants of SSE instructions on AVX targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 24 07:51:52 PDT 2018
Author: rksimon
Date: Sat Mar 24 07:51:52 2018
New Revision: 328423
URL: http://llvm.org/viewvc/llvm-project?rev=328423&view=rev
Log:
[X86][SSE] Ensure we're testing both non-VEX/VEX variants of SSE instructions on AVX targets
And ensure we don't use later instruction sets in SSE schedule tests
Modified:
llvm/trunk/test/CodeGen/X86/sse-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
+
+; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types.
define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; GENERIC-LABEL: test_addps:
@@ -30,42 +40,84 @@ define <4 x float> @test_addps(<4 x floa
; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addps:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addps:
; SKX: # %bb.0:
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -96,42 +148,84 @@ define float @test_addss(float %a0, floa
; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addss:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addss:
; SKX: # %bb.0:
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -166,42 +260,84 @@ define <4 x float> @test_andps(<4 x floa
; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andps:
; SANDY: # %bb.0:
; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andps:
; SKX: # %bb.0:
; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -240,42 +376,84 @@ define <4 x float> @test_andnotps(<4 x f
; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andnotps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andnotps:
; SANDY: # %bb.0:
; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andnotps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andnotps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andnotps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andnotps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andnotps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andnotps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andnotps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andnotps:
; SKX: # %bb.0:
; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andnotps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andnotps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andnotps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -315,6 +493,13 @@ define <4 x float> @test_cmpps(<4 x floa
; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmpps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmpps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -322,6 +507,13 @@ define <4 x float> @test_cmpps(<4 x floa
; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmpps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmpps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -329,6 +521,13 @@ define <4 x float> @test_cmpps(<4 x floa
; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmpps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmpps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -336,6 +535,13 @@ define <4 x float> @test_cmpps(<4 x floa
; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmpps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmpps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
@@ -343,14 +549,27 @@ define <4 x float> @test_cmpps(<4 x floa
; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmpps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmpps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
@@ -358,6 +577,13 @@ define <4 x float> @test_cmpps(<4 x floa
; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmpps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmpps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -392,42 +618,84 @@ define float @test_cmpss(float %a0, floa
; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmpss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmpss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmpss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmpss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmpss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmpss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmpss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmpss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmpss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmpss:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmpss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmpss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmpss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmpss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -487,6 +755,20 @@ define i32 @test_comiss(<4 x float> %a0,
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_comiss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_comiss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -501,6 +783,20 @@ define i32 @test_comiss(<4 x float> %a0,
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_comiss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_comiss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -515,6 +811,20 @@ define i32 @test_comiss(<4 x float> %a0,
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_comiss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_comiss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -529,6 +839,20 @@ define i32 @test_comiss(<4 x float> %a0,
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_comiss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_comiss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -543,6 +867,20 @@ define i32 @test_comiss(<4 x float> %a0,
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_comiss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_comiss:
; SKX: # %bb.0:
; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -557,6 +895,20 @@ define i32 @test_comiss(<4 x float> %a0,
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_comiss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_comiss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -571,6 +923,20 @@ define i32 @test_comiss(<4 x float> %a0,
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_comiss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_comiss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -614,6 +980,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2ss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
+; SANDY-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2ss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -621,6 +994,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2ss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2ss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -628,6 +1008,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2ss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2ss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -635,6 +1022,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2ss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2ss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -642,6 +1036,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2ss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -649,6 +1050,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2ss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2ss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -656,6 +1064,13 @@ define float @test_cvtsi2ss(i32 %a0, i32
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2ss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -691,6 +1106,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2ssq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; SANDY-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2ssq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -698,6 +1120,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2ssq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; HASWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2ssq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -705,6 +1134,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2ssq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2ssq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
@@ -712,6 +1148,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2ssq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2ssq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
@@ -719,6 +1162,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2ssq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
+; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2ssq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
@@ -726,6 +1176,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2ssq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2ssq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -733,6 +1190,13 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2ssq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2ssq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -768,6 +1232,13 @@ define i32 @test_cvtss2si(float %a0, flo
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtss2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtss2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
@@ -775,6 +1246,13 @@ define i32 @test_cvtss2si(float %a0, flo
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtss2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtss2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
@@ -782,6 +1260,13 @@ define i32 @test_cvtss2si(float %a0, flo
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtss2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtss2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
@@ -789,6 +1274,13 @@ define i32 @test_cvtss2si(float %a0, flo
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtss2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtss2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
@@ -796,6 +1288,13 @@ define i32 @test_cvtss2si(float %a0, flo
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtss2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtss2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
@@ -803,6 +1302,13 @@ define i32 @test_cvtss2si(float %a0, flo
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtss2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtss2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
@@ -810,6 +1316,13 @@ define i32 @test_cvtss2si(float %a0, flo
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtss2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtss2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
@@ -848,6 +1361,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtss2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtss2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
@@ -855,6 +1375,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtss2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtss2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
@@ -862,6 +1389,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtss2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtss2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
@@ -869,6 +1403,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtss2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtss2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
@@ -876,6 +1417,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtss2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtss2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
@@ -883,6 +1431,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtss2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtss2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
@@ -890,6 +1445,13 @@ define i64 @test_cvtss2siq(float %a0, fl
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtss2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtss2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
@@ -928,6 +1490,13 @@ define i32 @test_cvttss2si(float %a0, fl
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttss2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttss2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
@@ -935,6 +1504,13 @@ define i32 @test_cvttss2si(float %a0, fl
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttss2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttss2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
@@ -942,6 +1518,13 @@ define i32 @test_cvttss2si(float %a0, fl
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttss2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttss2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
@@ -949,6 +1532,13 @@ define i32 @test_cvttss2si(float %a0, fl
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttss2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttss2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
@@ -956,6 +1546,13 @@ define i32 @test_cvttss2si(float %a0, fl
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttss2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttss2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
@@ -963,6 +1560,13 @@ define i32 @test_cvttss2si(float %a0, fl
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttss2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttss2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
@@ -970,6 +1574,13 @@ define i32 @test_cvttss2si(float %a0, fl
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttss2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttss2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
@@ -1005,6 +1616,13 @@ define i64 @test_cvttss2siq(float %a0, f
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttss2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttss2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
@@ -1012,6 +1630,13 @@ define i64 @test_cvttss2siq(float %a0, f
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttss2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00]
+; HASWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttss2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
@@ -1019,6 +1644,13 @@ define i64 @test_cvttss2siq(float %a0, f
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttss2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttss2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
@@ -1026,6 +1658,13 @@ define i64 @test_cvttss2siq(float %a0, f
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttss2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttss2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
@@ -1033,6 +1672,13 @@ define i64 @test_cvttss2siq(float %a0, f
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttss2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00]
+; SKX-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttss2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
@@ -1040,6 +1686,13 @@ define i64 @test_cvttss2siq(float %a0, f
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttss2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttss2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
@@ -1047,6 +1700,13 @@ define i64 @test_cvttss2siq(float %a0, f
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttss2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttss2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
@@ -1079,42 +1739,84 @@ define <4 x float> @test_divps(<4 x floa
; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divps:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00]
+; BROADWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [16:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divps:
; SKX: # %bb.0:
; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divps %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divps (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -1145,42 +1847,84 @@ define float @test_divss(float %a0, floa
; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divss:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [18:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00]
+; BROADWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divss:
; SKX: # %bb.0:
; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divss %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divss (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -1211,42 +1955,84 @@ define void @test_ldmxcsr(i32 %a0) {
; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ldmxcsr:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ldmxcsr:
; SANDY: # %bb.0:
; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ldmxcsr:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ldmxcsr:
; HASWELL: # %bb.0:
; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ldmxcsr:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ldmxcsr:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ldmxcsr:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ldmxcsr:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ldmxcsr:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKX-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ldmxcsr:
; SKX: # %bb.0:
; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ldmxcsr:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ldmxcsr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ldmxcsr:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ldmxcsr:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
@@ -1279,42 +2065,84 @@ define <4 x float> @test_maxps(<4 x floa
; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxps:
; SKX: # %bb.0:
; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1346,42 +2174,84 @@ define <4 x float> @test_maxss(<4 x floa
; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxss:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxss:
; SKX: # %bb.0:
; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1413,42 +2283,84 @@ define <4 x float> @test_minps(<4 x floa
; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minps:
; SANDY: # %bb.0:
; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minps:
; SKX: # %bb.0:
; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1480,42 +2392,84 @@ define <4 x float> @test_minss(<4 x floa
; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minss:
; SANDY: # %bb.0:
; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minss:
; SKX: # %bb.0:
; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1550,6 +2504,13 @@ define void @test_movaps(<4 x float> *%a
; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movaps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movaps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1557,6 +2518,13 @@ define void @test_movaps(<4 x float> *%a
; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movaps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movaps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1564,6 +2532,13 @@ define void @test_movaps(<4 x float> *%a
; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movaps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movaps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
@@ -1571,6 +2546,13 @@ define void @test_movaps(<4 x float> *%a
; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movaps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movaps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1578,6 +2560,13 @@ define void @test_movaps(<4 x float> *%a
; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movaps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movaps:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
@@ -1585,6 +2574,13 @@ define void @test_movaps(<4 x float> *%a
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movaps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movaps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
@@ -1592,6 +2588,13 @@ define void @test_movaps(<4 x float> *%a
; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movaps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movaps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
@@ -1628,36 +2631,71 @@ define <4 x float> @test_movhlps(<4 x fl
; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movhlps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movhlps:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movhlps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movhlps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movhlps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movhlps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movhlps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movhlps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movhlps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movhlps:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movhlps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movhlps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movhlps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movhlps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
@@ -1689,9 +2727,18 @@ define void @test_movhps(<4 x float> %a0
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
+; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movhps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movhps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
@@ -1699,6 +2746,14 @@ define void @test_movhps(<4 x float> %a0
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movhps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movhps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1706,6 +2761,14 @@ define void @test_movhps(<4 x float> %a0
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movhps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movhps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1713,6 +2776,14 @@ define void @test_movhps(<4 x float> %a0
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movhps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movhps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1720,6 +2791,14 @@ define void @test_movhps(<4 x float> %a0
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movhps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1727,6 +2806,14 @@ define void @test_movhps(<4 x float> %a0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movhps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -1734,6 +2821,14 @@ define void @test_movhps(<4 x float> %a0
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movhps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movhps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
@@ -1771,42 +2866,84 @@ define <4 x float> @test_movlhps(<4 x fl
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movlhps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movlhps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movlhps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movlhps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movlhps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movlhps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movlhps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movlhps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movlhps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movlhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movlhps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movlhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movlhps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movlhps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -1839,6 +2976,13 @@ define void @test_movlps(<4 x float> %a0
; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movlps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movlps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
@@ -1846,6 +2990,13 @@ define void @test_movlps(<4 x float> %a0
; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movlps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movlps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1853,6 +3004,13 @@ define void @test_movlps(<4 x float> %a0
; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movlps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movlps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1860,6 +3018,13 @@ define void @test_movlps(<4 x float> %a0
; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movlps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movlps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1867,6 +3032,13 @@ define void @test_movlps(<4 x float> %a0
; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movlps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movlps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1874,6 +3046,13 @@ define void @test_movlps(<4 x float> %a0
; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movlps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movlps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -1881,6 +3060,13 @@ define void @test_movlps(<4 x float> %a0
; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movlps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movlps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
@@ -1915,36 +3101,71 @@ define i32 @test_movmskps(<4 x float> %a
; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movmskps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movmskps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movmskps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movmskps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movmskps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movmskps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movmskps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movmskps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movmskps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movmskps:
; SKX: # %bb.0:
; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movmskps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movmskps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movmskps %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movmskps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
@@ -1976,36 +3197,71 @@ define void @test_movntps(<4 x float> %a
; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntps:
; SKX: # %bb.0:
; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
@@ -2036,6 +3292,13 @@ define void @test_movss_mem(float* %a0,
; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movss_mem:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movss_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
@@ -2043,6 +3306,13 @@ define void @test_movss_mem(float* %a0,
; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movss_mem:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movss_mem:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2050,6 +3320,13 @@ define void @test_movss_mem(float* %a0,
; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movss_mem:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movss_mem:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2057,6 +3334,13 @@ define void @test_movss_mem(float* %a0,
; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movss_mem:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movss_mem:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2064,6 +3348,13 @@ define void @test_movss_mem(float* %a0,
; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movss_mem:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movss_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
@@ -2071,6 +3362,13 @@ define void @test_movss_mem(float* %a0,
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movss_mem:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movss_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2078,6 +3376,13 @@ define void @test_movss_mem(float* %a0,
; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movss_mem:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movss_mem:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2109,39 +3414,74 @@ define <4 x float> @test_movss_reg(<4 x
;
; SLM-LABEL: test_movss_reg:
; SLM: # %bb.0:
-; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SLM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movss_reg:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movss_reg:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movss_reg:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movss_reg:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movss_reg:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movss_reg:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movss_reg:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movss_reg:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movss_reg:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movss_reg:
; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
+; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movss_reg:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movss_reg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movss_reg:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movss_reg:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
@@ -2172,6 +3512,13 @@ define void @test_movups(<4 x float> *%a
; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movups:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movups:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2179,6 +3526,13 @@ define void @test_movups(<4 x float> *%a
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movups:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movups:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2186,6 +3540,13 @@ define void @test_movups(<4 x float> *%a
; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movups:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movups:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
@@ -2193,6 +3554,13 @@ define void @test_movups(<4 x float> *%a
; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movups:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movups:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2200,6 +3568,13 @@ define void @test_movups(<4 x float> *%a
; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movups:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movups:
; SKX: # %bb.0:
; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -2207,6 +3582,13 @@ define void @test_movups(<4 x float> *%a
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movups:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movups:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
@@ -2214,6 +3596,13 @@ define void @test_movups(<4 x float> *%a
; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movups:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movups (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movups:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
@@ -2245,42 +3634,84 @@ define <4 x float> @test_mulps(<4 x floa
; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulps:
; SKX: # %bb.0:
; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -2311,42 +3742,84 @@ define float @test_mulss(float %a0, floa
; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulss:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulss:
; SKX: # %bb.0:
; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -2381,42 +3854,84 @@ define <4 x float> @test_orps(<4 x float
; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_orps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_orps:
; SANDY: # %bb.0:
; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_orps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_orps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_orps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_orps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_orps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_orps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_orps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_orps:
; SKX: # %bb.0:
; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_orps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_orps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_orps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_orps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -2463,6 +3978,16 @@ define void @test_prefetch(i8* %a0) opts
; SLM-NEXT: #NO_APP
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_prefetch:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: #APP
+; SANDY-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; SANDY-SSE-NEXT: #NO_APP
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_prefetch:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
@@ -2473,6 +3998,16 @@ define void @test_prefetch(i8* %a0) opts
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_prefetch:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: #APP
+; HASWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; HASWELL-SSE-NEXT: #NO_APP
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_prefetch:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2483,6 +4018,16 @@ define void @test_prefetch(i8* %a0) opts
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_prefetch:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: #APP
+; BROADWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: #NO_APP
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_prefetch:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
@@ -2493,6 +4038,16 @@ define void @test_prefetch(i8* %a0) opts
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_prefetch:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: #APP
+; SKYLAKE-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: #NO_APP
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_prefetch:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
@@ -2503,6 +4058,16 @@ define void @test_prefetch(i8* %a0) opts
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_prefetch:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: #APP
+; SKX-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; SKX-SSE-NEXT: #NO_APP
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_prefetch:
; SKX: # %bb.0:
; SKX-NEXT: #APP
@@ -2513,6 +4078,16 @@ define void @test_prefetch(i8* %a0) opts
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_prefetch:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: #APP
+; BTVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: #NO_APP
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_prefetch:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2523,6 +4098,16 @@ define void @test_prefetch(i8* %a0) opts
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_prefetch:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: #APP
+; ZNVER1-SSE-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: prefetcht0 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: prefetcht1 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: prefetcht2 (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: #NO_APP
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_prefetch:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
@@ -2560,6 +4145,13 @@ define <4 x float> @test_rcpps(<4 x floa
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_rcpps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_rcpps:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2567,6 +4159,13 @@ define <4 x float> @test_rcpps(<4 x floa
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_rcpps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_rcpps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2574,6 +4173,13 @@ define <4 x float> @test_rcpps(<4 x floa
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_rcpps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_rcpps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2581,6 +4187,13 @@ define <4 x float> @test_rcpps(<4 x floa
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_rcpps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_rcpps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2588,6 +4201,13 @@ define <4 x float> @test_rcpps(<4 x floa
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_rcpps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_rcpps:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2595,6 +4215,13 @@ define <4 x float> @test_rcpps(<4 x floa
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_rcpps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
@@ -2602,6 +4229,13 @@ define <4 x float> @test_rcpps(<4 x floa
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_rcpps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_rcpps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
@@ -2643,6 +4277,14 @@ define <4 x float> @test_rcpss(float %a0
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_rcpss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_rcpss:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2651,6 +4293,14 @@ define <4 x float> @test_rcpss(float %a0
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_rcpss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_rcpss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2659,6 +4309,14 @@ define <4 x float> @test_rcpss(float %a0
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_rcpss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_rcpss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2667,6 +4325,14 @@ define <4 x float> @test_rcpss(float %a0
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_rcpss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_rcpss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2675,6 +4341,14 @@ define <4 x float> @test_rcpss(float %a0
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_rcpss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_rcpss:
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2683,6 +4357,14 @@ define <4 x float> @test_rcpss(float %a0
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_rcpss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_rcpss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2691,6 +4373,14 @@ define <4 x float> @test_rcpss(float %a0
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_rcpss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_rcpss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2732,6 +4422,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_rsqrtps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_rsqrtps:
; SANDY: # %bb.0:
; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2739,6 +4436,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_rsqrtps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2746,6 +4450,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_rsqrtps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_rsqrtps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
@@ -2753,6 +4464,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_rsqrtps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_rsqrtps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2760,6 +4478,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_rsqrtps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_rsqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
@@ -2767,6 +4492,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_rsqrtps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
@@ -2774,6 +4506,13 @@ define <4 x float> @test_rsqrtps(<4 x fl
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_rsqrtps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [12:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_rsqrtps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
@@ -2815,6 +4554,14 @@ define <4 x float> @test_rsqrtss(float %
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_rsqrtss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_rsqrtss:
; SANDY: # %bb.0:
; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2823,6 +4570,14 @@ define <4 x float> @test_rsqrtss(float %
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_rsqrtss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_rsqrtss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2831,6 +4586,14 @@ define <4 x float> @test_rsqrtss(float %
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_rsqrtss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_rsqrtss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -2839,6 +4602,14 @@ define <4 x float> @test_rsqrtss(float %
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_rsqrtss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_rsqrtss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2847,6 +4618,14 @@ define <4 x float> @test_rsqrtss(float %
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_rsqrtss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_rsqrtss:
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2855,6 +4634,14 @@ define <4 x float> @test_rsqrtss(float %
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_rsqrtss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_rsqrtss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2863,6 +4650,14 @@ define <4 x float> @test_rsqrtss(float %
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_rsqrtss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_rsqrtss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2902,36 +4697,71 @@ define void @test_sfence() {
; SLM-NEXT: sfence # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sfence:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sfence # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sfence:
; SANDY: # %bb.0:
; SANDY-NEXT: sfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sfence:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sfence # sched: [2:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sfence:
; HASWELL: # %bb.0:
; HASWELL-NEXT: sfence # sched: [2:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sfence:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sfence # sched: [2:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sfence:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: sfence # sched: [2:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sfence:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sfence # sched: [2:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sfence:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: sfence # sched: [2:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sfence:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sfence # sched: [2:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sfence:
; SKX: # %bb.0:
; SKX-NEXT: sfence # sched: [2:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sfence:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: sfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sfence:
; BTVER2: # %bb.0:
; BTVER2-NEXT: sfence # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sfence:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: sfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sfence:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: sfence # sched: [1:0.50]
@@ -2963,6 +4793,13 @@ define <4 x float> @test_shufps(<4 x flo
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_shufps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SANDY-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_shufps:
; SANDY: # %bb.0:
; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2970,6 +4807,13 @@ define <4 x float> @test_shufps(<4 x flo
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_shufps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_shufps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2977,6 +4821,13 @@ define <4 x float> @test_shufps(<4 x flo
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_shufps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_shufps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2984,6 +4835,13 @@ define <4 x float> @test_shufps(<4 x flo
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_shufps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_shufps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2991,6 +4849,13 @@ define <4 x float> @test_shufps(<4 x flo
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_shufps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_shufps:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
@@ -2998,6 +4863,13 @@ define <4 x float> @test_shufps(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_shufps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_shufps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@@ -3005,6 +4877,13 @@ define <4 x float> @test_shufps(<4 x flo
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_shufps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_shufps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@@ -3041,6 +4920,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00]
+; SANDY-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtps:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3048,6 +4934,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3055,6 +4948,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
@@ -3062,6 +4962,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
@@ -3069,6 +4976,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:1.00]
+; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
@@ -3076,6 +4990,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00]
+; BTVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
@@ -3083,6 +5004,13 @@ define <4 x float> @test_sqrtps(<4 x flo
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
@@ -3124,6 +5052,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00]
+; SANDY-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtss:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
@@ -3132,6 +5068,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
@@ -3140,6 +5084,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
@@ -3148,6 +5100,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
@@ -3156,6 +5116,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:1.00]
+; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtss:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
@@ -3164,6 +5132,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
@@ -3172,6 +5148,14 @@ define <4 x float> @test_sqrtss(<4 x flo
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
@@ -3206,42 +5190,84 @@ define i32 @test_stmxcsr() {
; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_stmxcsr:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_stmxcsr:
; SANDY: # %bb.0:
; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_stmxcsr:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_stmxcsr:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_stmxcsr:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_stmxcsr:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_stmxcsr:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_stmxcsr:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_stmxcsr:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; SKX-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_stmxcsr:
; SKX: # %bb.0:
; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_stmxcsr:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_stmxcsr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_stmxcsr:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
+; ZNVER1-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_stmxcsr:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
@@ -3274,42 +5300,84 @@ define <4 x float> @test_subps(<4 x floa
; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subps:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subps:
; SKX: # %bb.0:
; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3340,42 +5408,84 @@ define float @test_subss(float %a0, floa
; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subss:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subss:
; SKX: # %bb.0:
; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3430,6 +5540,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ucomiss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ucomiss:
; SANDY: # %bb.0:
; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3444,6 +5568,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ucomiss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ucomiss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3458,6 +5596,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ucomiss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ucomiss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3472,6 +5624,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ucomiss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ucomiss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3486,6 +5652,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ucomiss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ucomiss:
; SKX: # %bb.0:
; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3500,6 +5680,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ucomiss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ucomiss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3514,6 +5708,20 @@ define i32 @test_ucomiss(<4 x float> %a0
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ucomiss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ucomiss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -3557,6 +5765,13 @@ define <4 x float> @test_unpckhps(<4 x f
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpckhps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpckhps:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3564,6 +5779,13 @@ define <4 x float> @test_unpckhps(<4 x f
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpckhps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3571,6 +5793,13 @@ define <4 x float> @test_unpckhps(<4 x f
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpckhps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpckhps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3578,6 +5807,13 @@ define <4 x float> @test_unpckhps(<4 x f
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpckhps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpckhps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3585,6 +5821,13 @@ define <4 x float> @test_unpckhps(<4 x f
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpckhps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpckhps:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -3592,6 +5835,13 @@ define <4 x float> @test_unpckhps(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpckhps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -3599,6 +5849,13 @@ define <4 x float> @test_unpckhps(<4 x f
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpckhps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpckhps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -3634,6 +5891,13 @@ define <4 x float> @test_unpcklps(<4 x f
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpcklps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpcklps:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3641,6 +5905,13 @@ define <4 x float> @test_unpcklps(<4 x f
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpcklps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3648,6 +5919,13 @@ define <4 x float> @test_unpcklps(<4 x f
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpcklps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpcklps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3655,6 +5933,13 @@ define <4 x float> @test_unpcklps(<4 x f
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpcklps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpcklps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3662,6 +5947,13 @@ define <4 x float> @test_unpcklps(<4 x f
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpcklps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpcklps:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -3669,6 +5961,13 @@ define <4 x float> @test_unpcklps(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpcklps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -3676,6 +5975,13 @@ define <4 x float> @test_unpcklps(<4 x f
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpcklps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpcklps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -3712,42 +6018,84 @@ define <4 x float> @test_xorps(<4 x floa
; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_xorps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_xorps:
; SANDY: # %bb.0:
; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_xorps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_xorps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_xorps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_xorps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_xorps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_xorps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_xorps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_xorps:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_xorps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_xorps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_xorps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_xorps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3796,6 +6144,14 @@ define <4 x float> @test_fnop() nounwind
; SLM-NEXT: #NO_APP
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_fnop:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: #APP
+; SANDY-SSE-NEXT: nop # sched: [1:?]
+; SANDY-SSE-NEXT: #NO_APP
+; SANDY-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_fnop:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
@@ -3804,6 +6160,14 @@ define <4 x float> @test_fnop() nounwind
; SANDY-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_fnop:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: #APP
+; HASWELL-SSE-NEXT: nop # sched: [1:0.25]
+; HASWELL-SSE-NEXT: #NO_APP
+; HASWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_fnop:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -3812,6 +6176,14 @@ define <4 x float> @test_fnop() nounwind
; HASWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_fnop:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: #APP
+; BROADWELL-SSE-NEXT: nop # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: #NO_APP
+; BROADWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_fnop:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
@@ -3820,6 +6192,14 @@ define <4 x float> @test_fnop() nounwind
; BROADWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_fnop:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: #APP
+; SKYLAKE-SSE-NEXT: nop # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: #NO_APP
+; SKYLAKE-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_fnop:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
@@ -3828,6 +6208,14 @@ define <4 x float> @test_fnop() nounwind
; SKYLAKE-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_fnop:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: #APP
+; SKX-SSE-NEXT: nop # sched: [1:0.25]
+; SKX-SSE-NEXT: #NO_APP
+; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_fnop:
; SKX: # %bb.0:
; SKX-NEXT: #APP
@@ -3836,6 +6224,14 @@ define <4 x float> @test_fnop() nounwind
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_fnop:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: #APP
+; BTVER2-SSE-NEXT: nop # sched: [1:0.50]
+; BTVER2-SSE-NEXT: #NO_APP
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3844,6 +6240,14 @@ define <4 x float> @test_fnop() nounwind
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_fnop:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: #APP
+; ZNVER1-SSE-NEXT: nop # sched: [1:?]
+; ZNVER1-SSE-NEXT: #NO_APP
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_fnop:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_addpd:
@@ -30,42 +38,84 @@ define <2 x double> @test_addpd(<2 x dou
; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -96,42 +146,84 @@ define double @test_addsd(double %a0, do
; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addsd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -165,6 +257,13 @@ define <2 x double> @test_andpd(<2 x dou
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -172,6 +271,13 @@ define <2 x double> @test_andpd(<2 x dou
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -179,6 +285,13 @@ define <2 x double> @test_andpd(<2 x dou
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -186,6 +299,13 @@ define <2 x double> @test_andpd(<2 x dou
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -193,6 +313,13 @@ define <2 x double> @test_andpd(<2 x dou
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andpd:
; SKX: # %bb.0:
; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -200,6 +327,13 @@ define <2 x double> @test_andpd(<2 x dou
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -207,6 +341,13 @@ define <2 x double> @test_andpd(<2 x dou
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -246,6 +387,13 @@ define <2 x double> @test_andnotpd(<2 x
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andnotpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andnotpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -253,6 +401,13 @@ define <2 x double> @test_andnotpd(<2 x
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andnotpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andnotpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -260,6 +415,13 @@ define <2 x double> @test_andnotpd(<2 x
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andnotpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andnotpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -267,6 +429,13 @@ define <2 x double> @test_andnotpd(<2 x
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andnotpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andnotpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -274,6 +443,13 @@ define <2 x double> @test_andnotpd(<2 x
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andnotpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andnotpd:
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -281,6 +457,13 @@ define <2 x double> @test_andnotpd(<2 x
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andnotpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -288,6 +471,13 @@ define <2 x double> @test_andnotpd(<2 x
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andnotpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andnotpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -329,36 +519,71 @@ define void @test_clflush(i8* %p){
; SLM-NEXT: clflush (%rdi) # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_clflush:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_clflush:
; SANDY: # %bb.0:
; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_clflush:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_clflush:
; HASWELL: # %bb.0:
; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_clflush:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_clflush:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_clflush:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_clflush:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_clflush:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_clflush:
; SKX: # %bb.0:
; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_clflush:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_clflush:
; BTVER2: # %bb.0:
; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_clflush:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_clflush:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50]
@@ -390,6 +615,13 @@ define <2 x double> @test_cmppd(<2 x dou
; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmppd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmppd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -397,6 +629,13 @@ define <2 x double> @test_cmppd(<2 x dou
; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmppd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmppd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -404,6 +643,13 @@ define <2 x double> @test_cmppd(<2 x dou
; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmppd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmppd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -411,6 +657,13 @@ define <2 x double> @test_cmppd(<2 x dou
; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmppd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmppd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
@@ -418,14 +671,27 @@ define <2 x double> @test_cmppd(<2 x dou
; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmppd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmppd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
@@ -433,6 +699,13 @@ define <2 x double> @test_cmppd(<2 x dou
; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmppd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmppd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -467,42 +740,84 @@ define double @test_cmpsd(double %a0, do
; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmpsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmpsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmpsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmpsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmpsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmpsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmpsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmpsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmpsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmpsd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmpsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmpsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmpsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmpsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -562,6 +877,20 @@ define i32 @test_comisd(<2 x double> %a0
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_comisd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_comisd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -576,6 +905,20 @@ define i32 @test_comisd(<2 x double> %a0
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_comisd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_comisd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -590,6 +933,20 @@ define i32 @test_comisd(<2 x double> %a0
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_comisd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_comisd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -604,6 +961,20 @@ define i32 @test_comisd(<2 x double> %a0
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_comisd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_comisd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -618,6 +989,20 @@ define i32 @test_comisd(<2 x double> %a0
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_comisd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_comisd:
; SKX: # %bb.0:
; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -632,6 +1017,20 @@ define i32 @test_comisd(<2 x double> %a0
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_comisd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_comisd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -646,6 +1045,20 @@ define i32 @test_comisd(<2 x double> %a0
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_comisd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_comisd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -689,6 +1102,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtdq2pd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtdq2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -696,6 +1116,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtdq2pd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtdq2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -703,6 +1130,14 @@ define <2 x double> @test_cvtdq2pd(<4 x
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtdq2pd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtdq2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
@@ -710,6 +1145,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtdq2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -717,6 +1159,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtdq2pd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtdq2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -724,6 +1173,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtdq2pd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -731,6 +1187,13 @@ define <2 x double> @test_cvtdq2pd(<4 x
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtdq2pd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtdq2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
@@ -769,6 +1232,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtdq2ps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtdq2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -776,6 +1246,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtdq2ps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtdq2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -783,6 +1260,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtdq2ps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtdq2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -790,6 +1274,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtdq2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
@@ -797,6 +1288,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtdq2ps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
@@ -804,6 +1302,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtdq2ps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
@@ -811,6 +1316,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtdq2ps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtdq2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
@@ -847,6 +1359,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtpd2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -854,6 +1373,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtpd2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -861,6 +1387,14 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtpd2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -868,6 +1402,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -875,6 +1416,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtpd2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -882,6 +1430,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtpd2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -889,6 +1444,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtpd2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -926,6 +1488,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtpd2ps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtpd2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -933,6 +1502,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtpd2ps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtpd2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -940,6 +1516,14 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtpd2ps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtpd2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -947,6 +1531,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtpd2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -954,6 +1545,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtpd2ps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtpd2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -961,6 +1559,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtpd2ps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
@@ -968,6 +1573,13 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtpd2ps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtpd2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
@@ -1005,6 +1617,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtps2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1012,6 +1631,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtps2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1019,6 +1645,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtps2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1026,6 +1659,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtps2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1033,6 +1673,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtps2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1040,6 +1687,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtps2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1047,6 +1701,13 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtps2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1084,6 +1745,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtps2pd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtps2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1091,6 +1759,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtps2pd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtps2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1098,6 +1773,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtps2pd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtps2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1105,6 +1787,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtps2pd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtps2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1112,6 +1801,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtps2pd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtps2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1119,6 +1815,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtps2pd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -1126,6 +1829,13 @@ define <2 x double> @test_cvtps2pd(<4 x
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtps2pd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtps2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
@@ -1163,6 +1873,13 @@ define i32 @test_cvtsd2si(double %a0, do
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1170,6 +1887,13 @@ define i32 @test_cvtsd2si(double %a0, do
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1177,6 +1901,13 @@ define i32 @test_cvtsd2si(double %a0, do
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1184,6 +1915,13 @@ define i32 @test_cvtsd2si(double %a0, do
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1191,6 +1929,13 @@ define i32 @test_cvtsd2si(double %a0, do
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1198,6 +1943,13 @@ define i32 @test_cvtsd2si(double %a0, do
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1205,6 +1957,13 @@ define i32 @test_cvtsd2si(double %a0, do
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1243,6 +2002,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1250,6 +2016,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1257,6 +2030,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1264,6 +2044,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1271,6 +2058,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1278,6 +2072,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1285,6 +2086,13 @@ define i64 @test_cvtsd2siq(double %a0, d
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1327,6 +2135,14 @@ define float @test_cvtsd2ss(double %a0,
; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2ss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2ss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1335,6 +2151,14 @@ define float @test_cvtsd2ss(double %a0,
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2ss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2ss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1343,6 +2167,14 @@ define float @test_cvtsd2ss(double %a0,
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2ss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2ss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1351,6 +2183,14 @@ define float @test_cvtsd2ss(double %a0,
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2ss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1359,6 +2199,14 @@ define float @test_cvtsd2ss(double %a0,
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2ss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1367,6 +2215,14 @@ define float @test_cvtsd2ss(double %a0,
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2ss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2ss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -1375,6 +2231,14 @@ define float @test_cvtsd2ss(double %a0,
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2ss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -1411,6 +2275,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2sd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1418,6 +2289,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2sd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1425,6 +2303,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2sd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1432,6 +2317,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1439,6 +2331,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2sd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1446,6 +2345,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2sd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1453,6 +2359,13 @@ define double @test_cvtsi2sd(i32 %a0, i3
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2sd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1488,6 +2401,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2sdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2sdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1495,6 +2415,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2sdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2sdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1502,6 +2429,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2sdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1509,6 +2443,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2sdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1516,6 +2457,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2sdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2sdq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1523,6 +2471,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2sdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1530,6 +2485,13 @@ define double @test_cvtsi2sdq(i64 %a0, i
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2sdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1571,6 +2533,14 @@ define double @test_cvtss2sd(float %a0,
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtss2sd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtss2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
@@ -1579,6 +2549,14 @@ define double @test_cvtss2sd(float %a0,
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtss2sd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtss2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1587,6 +2565,14 @@ define double @test_cvtss2sd(float %a0,
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtss2sd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtss2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1595,6 +2581,14 @@ define double @test_cvtss2sd(float %a0,
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtss2sd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtss2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1603,6 +2597,14 @@ define double @test_cvtss2sd(float %a0,
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtss2sd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtss2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1611,6 +2613,14 @@ define double @test_cvtss2sd(float %a0,
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtss2sd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtss2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -1619,6 +2629,14 @@ define double @test_cvtss2sd(float %a0,
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtss2sd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtss2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -1656,6 +2674,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttpd2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1663,6 +2688,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttpd2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1670,6 +2702,14 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttpd2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1677,6 +2717,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1684,6 +2731,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttpd2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1691,6 +2745,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttpd2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -1698,6 +2759,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttpd2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -1736,6 +2804,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttps2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1743,6 +2818,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttps2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1750,6 +2832,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttps2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1757,6 +2846,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttps2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1764,6 +2860,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttps2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1771,6 +2874,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttps2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1778,6 +2888,13 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttps2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1813,6 +2930,13 @@ define i32 @test_cvttsd2si(double %a0, d
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttsd2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1820,6 +2944,13 @@ define i32 @test_cvttsd2si(double %a0, d
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttsd2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1827,6 +2958,13 @@ define i32 @test_cvttsd2si(double %a0, d
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttsd2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1834,6 +2972,13 @@ define i32 @test_cvttsd2si(double %a0, d
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttsd2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1841,6 +2986,13 @@ define i32 @test_cvttsd2si(double %a0, d
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttsd2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttsd2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1848,6 +3000,13 @@ define i32 @test_cvttsd2si(double %a0, d
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttsd2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttsd2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1855,6 +3014,13 @@ define i32 @test_cvttsd2si(double %a0, d
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttsd2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttsd2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1890,6 +3056,13 @@ define i64 @test_cvttsd2siq(double %a0,
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttsd2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttsd2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1897,6 +3070,13 @@ define i64 @test_cvttsd2siq(double %a0,
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttsd2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttsd2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1904,6 +3084,13 @@ define i64 @test_cvttsd2siq(double %a0,
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttsd2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttsd2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1911,6 +3098,13 @@ define i64 @test_cvttsd2siq(double %a0,
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttsd2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1918,6 +3112,13 @@ define i64 @test_cvttsd2siq(double %a0,
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttsd2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttsd2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1925,6 +3126,13 @@ define i64 @test_cvttsd2siq(double %a0,
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttsd2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttsd2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1932,6 +3140,13 @@ define i64 @test_cvttsd2siq(double %a0,
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttsd2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttsd2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1964,42 +3179,84 @@ define <2 x double> @test_divpd(<2 x dou
; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divpd:
; SKX: # %bb.0:
; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2030,42 +3287,84 @@ define double @test_divsd(double %a0, do
; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divsd:
; SKX: # %bb.0:
; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2099,36 +3398,71 @@ define void @test_lfence() {
; SLM-NEXT: lfence # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_lfence:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: lfence # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_lfence:
; SANDY: # %bb.0:
; SANDY-NEXT: lfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_lfence:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: lfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_lfence:
; HASWELL: # %bb.0:
; HASWELL-NEXT: lfence # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_lfence:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_lfence:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: lfence # sched: [2:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_lfence:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_lfence:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: lfence # sched: [2:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_lfence:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: lfence # sched: [2:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_lfence:
; SKX: # %bb.0:
; SKX-NEXT: lfence # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_lfence:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: lfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_lfence:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lfence # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_lfence:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_lfence:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: lfence # sched: [1:0.50]
@@ -2160,36 +3494,71 @@ define void @test_mfence() {
; SLM-NEXT: mfence # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mfence:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mfence # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mfence:
; SANDY: # %bb.0:
; SANDY-NEXT: mfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mfence:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mfence:
; HASWELL: # %bb.0:
; HASWELL-NEXT: mfence # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mfence:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mfence:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: mfence # sched: [2:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mfence:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mfence:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: mfence # sched: [3:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mfence:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mfence # sched: [3:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mfence:
; SKX: # %bb.0:
; SKX-NEXT: mfence # sched: [3:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mfence:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mfence:
; BTVER2: # %bb.0:
; BTVER2-NEXT: mfence # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mfence:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mfence:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: mfence # sched: [1:0.50]
@@ -2219,36 +3588,71 @@ define void @test_maskmovdqu(<16 x i8> %
; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maskmovdqu:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maskmovdqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maskmovdqu:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maskmovdqu:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maskmovdqu:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maskmovdqu:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maskmovdqu:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maskmovdqu:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maskmovdqu:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maskmovdqu:
; SKX: # %bb.0:
; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maskmovdqu:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maskmovdqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maskmovdqu:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maskmovdqu:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:?]
@@ -2277,42 +3681,84 @@ define <2 x double> @test_maxpd(<2 x dou
; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2344,42 +3790,84 @@ define <2 x double> @test_maxsd(<2 x dou
; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxsd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2411,42 +3899,84 @@ define <2 x double> @test_minpd(<2 x dou
; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2478,42 +4008,84 @@ define <2 x double> @test_minsd(<2 x dou
; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minsd:
; SKX: # %bb.0:
; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2548,6 +4120,13 @@ define void @test_movapd(<2 x double> *%
; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movapd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movapd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2555,6 +4134,13 @@ define void @test_movapd(<2 x double> *%
; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movapd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movapd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2562,6 +4148,13 @@ define void @test_movapd(<2 x double> *%
; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movapd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movapd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50]
@@ -2569,6 +4162,13 @@ define void @test_movapd(<2 x double> *%
; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movapd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movapd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2576,6 +4176,13 @@ define void @test_movapd(<2 x double> *%
; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movapd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2583,6 +4190,13 @@ define void @test_movapd(<2 x double> *%
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movapd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00]
@@ -2590,6 +4204,13 @@ define void @test_movapd(<2 x double> *%
; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movapd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movapd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50]
@@ -2624,6 +4245,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2631,6 +4259,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2638,6 +4273,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
@@ -2645,6 +4287,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2652,6 +4301,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movdqa:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2659,6 +4315,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
@@ -2666,6 +4329,13 @@ define void @test_movdqa(<2 x i64> *%a0,
; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -2700,6 +4370,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movdqu:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movdqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2707,6 +4384,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movdqu:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movdqu:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2714,6 +4398,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movdqu:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movdqu:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
@@ -2721,6 +4412,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movdqu:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movdqu:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2728,6 +4426,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movdqu:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movdqu:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2735,6 +4440,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movdqu:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movdqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
@@ -2742,6 +4454,13 @@ define void @test_movdqu(<2 x i64> *%a0,
; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movdqu:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movdqu:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
@@ -2785,6 +4504,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2795,6 +4524,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2805,6 +4544,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2815,6 +4564,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2825,16 +4584,36 @@ define i32 @test_movd(<4 x i32> %a0, i32
; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movd:
; SKX: # %bb.0:
-; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovd %edi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vmovd %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2845,6 +4624,16 @@ define i32 @test_movd(<4 x i32> %a0, i32
; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2896,6 +4685,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movd_64:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movd_64:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2906,6 +4705,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movd_64:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movd_64:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2916,6 +4725,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movd_64:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movd_64:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2926,6 +4745,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movd_64:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movd_64:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2936,16 +4765,36 @@ define i64 @test_movd_64(<2 x i64> %a0,
; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movd_64:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movd_64:
; SKX: # %bb.0:
-; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vmovq %rdi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT: vmovq %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movd_64:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
@@ -2956,6 +4805,16 @@ define i64 @test_movd_64(<2 x i64> %a0,
; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movd_64:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movd_64:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
@@ -2998,6 +4857,13 @@ define void @test_movhpd(<2 x double> %a
; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movhpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movhpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
@@ -3005,6 +4871,13 @@ define void @test_movhpd(<2 x double> %a
; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movhpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movhpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3012,6 +4885,13 @@ define void @test_movhpd(<2 x double> %a
; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movhpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movhpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3019,6 +4899,13 @@ define void @test_movhpd(<2 x double> %a
; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movhpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movhpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3026,6 +4913,13 @@ define void @test_movhpd(<2 x double> %a
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movhpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movhpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3033,6 +4927,13 @@ define void @test_movhpd(<2 x double> %a
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movhpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3040,6 +4941,13 @@ define void @test_movhpd(<2 x double> %a
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movhpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movhpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
@@ -3077,6 +4985,13 @@ define void @test_movlpd(<2 x double> %a
; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movlpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movlpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
@@ -3084,6 +4999,13 @@ define void @test_movlpd(<2 x double> %a
; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movlpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movlpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3091,6 +5013,13 @@ define void @test_movlpd(<2 x double> %a
; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movlpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movlpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3098,6 +5027,13 @@ define void @test_movlpd(<2 x double> %a
; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movlpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movlpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3105,6 +5041,13 @@ define void @test_movlpd(<2 x double> %a
; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movlpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movlpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3112,6 +5055,13 @@ define void @test_movlpd(<2 x double> %a
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movlpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movlpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3119,6 +5069,13 @@ define void @test_movlpd(<2 x double> %a
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movlpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movlpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
@@ -3152,36 +5109,71 @@ define i32 @test_movmskpd(<2 x double> %
; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movmskpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movmskpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movmskpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movmskpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movmskpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movmskpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movmskpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movmskpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movmskpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movmskpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movmskpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movmskpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movmskpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00]
@@ -3212,42 +5204,84 @@ define void @test_movntdqa(<2 x i64> %a0
; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntdqa:
; SKX: # %bb.0:
; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3277,42 +5311,84 @@ define void @test_movntpd(<2 x double> %
; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3345,6 +5421,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movq_mem:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movq_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
@@ -3352,6 +5435,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movq_mem:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movq_mem:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3359,6 +5449,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movq_mem:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movq_mem:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3366,6 +5463,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movq_mem:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movq_mem:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3373,6 +5477,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movq_mem:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movq_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3380,6 +5491,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movq_mem:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movq_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -3387,6 +5505,13 @@ define <2 x i64> @test_movq_mem(<2 x i64
; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movq_mem:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movq_mem:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -3422,42 +5547,84 @@ define <2 x i64> @test_movq_reg(<2 x i64
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movq_reg:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movq_reg:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movq_reg:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movq_reg:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movq_reg:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movq_reg:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movq_reg:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movq_reg:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movq_reg:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movq_reg:
; SKX: # %bb.0:
; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movq_reg:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movq_reg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movq_reg:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movq_reg:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
@@ -3490,6 +5657,13 @@ define void @test_movsd_mem(double* %a0,
; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movsd_mem:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movsd_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
@@ -3497,6 +5671,13 @@ define void @test_movsd_mem(double* %a0,
; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movsd_mem:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movsd_mem:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3504,6 +5685,13 @@ define void @test_movsd_mem(double* %a0,
; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movsd_mem:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movsd_mem:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3511,6 +5699,13 @@ define void @test_movsd_mem(double* %a0,
; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movsd_mem:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movsd_mem:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3518,6 +5713,13 @@ define void @test_movsd_mem(double* %a0,
; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movsd_mem:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movsd_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3525,6 +5727,13 @@ define void @test_movsd_mem(double* %a0,
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movsd_mem:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movsd_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
@@ -3532,6 +5741,13 @@ define void @test_movsd_mem(double* %a0,
; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movsd_mem:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movsd_mem:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
@@ -3567,36 +5783,78 @@ define <2 x double> @test_movsd_reg(<2 x
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movsd_reg:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movsd_reg:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movsd_reg:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movsd_reg:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movsd_reg:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movsd_reg:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movsd_reg:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movsd_reg:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movsd_reg:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movsd_reg:
; SKX: # %bb.0:
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movsd_reg:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movsd_reg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movsd_reg:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movsd_reg:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
@@ -3627,6 +5885,13 @@ define void @test_movupd(<2 x double> *%
; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movupd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movupd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3634,6 +5899,13 @@ define void @test_movupd(<2 x double> *%
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movupd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movupd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3641,6 +5913,13 @@ define void @test_movupd(<2 x double> *%
; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movupd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movupd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50]
@@ -3648,6 +5927,13 @@ define void @test_movupd(<2 x double> *%
; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movupd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movupd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3655,6 +5941,13 @@ define void @test_movupd(<2 x double> *%
; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movupd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3662,6 +5955,13 @@ define void @test_movupd(<2 x double> *%
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movupd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00]
@@ -3669,6 +5969,13 @@ define void @test_movupd(<2 x double> *%
; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movupd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movupd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50]
@@ -3700,42 +6007,84 @@ define <2 x double> @test_mulpd(<2 x dou
; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3766,42 +6115,84 @@ define double @test_mulsd(double %a0, do
; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulsd:
; SKX: # %bb.0:
; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3835,6 +6226,13 @@ define <2 x double> @test_orpd(<2 x doub
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_orpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_orpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3842,6 +6240,13 @@ define <2 x double> @test_orpd(<2 x doub
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_orpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_orpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3849,6 +6254,13 @@ define <2 x double> @test_orpd(<2 x doub
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_orpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_orpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3856,6 +6268,13 @@ define <2 x double> @test_orpd(<2 x doub
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_orpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_orpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3863,6 +6282,13 @@ define <2 x double> @test_orpd(<2 x doub
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_orpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_orpd:
; SKX: # %bb.0:
; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3870,6 +6296,13 @@ define <2 x double> @test_orpd(<2 x doub
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_orpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_orpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3877,6 +6310,13 @@ define <2 x double> @test_orpd(<2 x doub
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_orpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_orpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3917,42 +6357,84 @@ define <8 x i16> @test_packssdw(<4 x i32
; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packssdw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packssdw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packssdw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packssdw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packssdw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packssdw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packssdw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packssdw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packssdw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packssdw:
; SKX: # %bb.0:
; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packssdw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packssdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packssdw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packssdw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3989,42 +6471,84 @@ define <16 x i8> @test_packsswb(<8 x i16
; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packsswb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packsswb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packsswb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packsswb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packsswb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packsswb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packsswb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packsswb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packsswb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packsswb:
; SKX: # %bb.0:
; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packsswb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packsswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packsswb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packsswb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4061,42 +6585,84 @@ define <16 x i8> @test_packuswb(<8 x i16
; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packuswb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packuswb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packuswb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packuswb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packuswb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packuswb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packuswb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packuswb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packuswb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packuswb:
; SKX: # %bb.0:
; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packuswb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packuswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packuswb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packuswb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4133,42 +6699,84 @@ define <16 x i8> @test_paddb(<16 x i8> %
; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4203,42 +6811,84 @@ define <4 x i32> @test_paddd(<4 x i32> %
; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddd:
; SKX: # %bb.0:
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4269,42 +6919,84 @@ define <2 x i64> @test_paddq(<2 x i64> %
; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddq:
; SKX: # %bb.0:
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4339,42 +7031,84 @@ define <16 x i8> @test_paddsb(<16 x i8>
; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddsb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4410,42 +7144,84 @@ define <8 x i16> @test_paddsw(<8 x i16>
; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddsw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4481,42 +7257,84 @@ define <16 x i8> @test_paddusb(<16 x i8>
; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddusb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddusb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddusb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddusb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddusb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddusb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddusb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddusb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddusb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddusb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddusb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddusb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddusb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4552,42 +7370,84 @@ define <8 x i16> @test_paddusw(<8 x i16>
; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddusw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddusw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddusw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddusw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddusw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddusw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddusw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddusw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddusw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddusw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddusw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddusw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddusw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4623,42 +7483,84 @@ define <8 x i16> @test_paddw(<8 x i16> %
; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4692,6 +7594,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pand:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pand:
; SANDY: # %bb.0:
; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4699,6 +7608,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pand:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pand:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4706,6 +7622,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pand:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pand:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4713,6 +7636,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pand:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pand:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4720,6 +7650,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pand:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pand:
; SKX: # %bb.0:
; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4727,6 +7664,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pand:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pand:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4734,6 +7678,13 @@ define <2 x i64> @test_pand(<2 x i64> %a
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pand:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pand:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4775,6 +7726,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pandn:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pandn:
; SANDY: # %bb.0:
; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4782,6 +7742,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pandn:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pandn:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4789,6 +7758,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pandn:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pandn:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4796,6 +7774,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pandn:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pandn:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4803,6 +7790,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pandn:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pandn:
; SKX: # %bb.0:
; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4810,6 +7806,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pandn:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pandn:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4817,6 +7822,15 @@ define <2 x i64> @test_pandn(<2 x i64> %
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pandn:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pandn:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4855,42 +7869,84 @@ define <16 x i8> @test_pavgb(<16 x i8> %
; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pavgb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pavgb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pavgb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pavgb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pavgb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pavgb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pavgb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pavgb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pavgb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pavgb:
; SKX: # %bb.0:
; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pavgb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pavgb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pavgb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pavgb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4935,42 +7991,84 @@ define <8 x i16> @test_pavgw(<8 x i16> %
; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pavgw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pavgw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pavgw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pavgw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pavgw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pavgw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pavgw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pavgw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pavgw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pavgw:
; SKX: # %bb.0:
; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pavgw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pavgw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pavgw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pavgw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5016,6 +8114,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5023,6 +8128,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5030,6 +8142,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5037,6 +8156,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5044,14 +8170,27 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5059,6 +8198,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5097,6 +8243,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5104,6 +8257,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5111,6 +8271,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5118,6 +8285,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5125,14 +8299,27 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5140,6 +8327,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5178,6 +8372,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5185,6 +8386,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5192,6 +8400,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5199,6 +8414,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5206,14 +8428,27 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5221,6 +8456,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5260,6 +8502,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5267,6 +8517,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5274,6 +8532,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5281,6 +8547,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5288,14 +8562,29 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5303,6 +8592,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5342,6 +8639,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5349,6 +8654,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5356,6 +8669,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5363,6 +8684,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5370,14 +8699,29 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5385,6 +8729,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5424,6 +8776,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5431,6 +8791,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5438,6 +8806,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5445,6 +8821,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5452,14 +8836,29 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5467,6 +8866,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5500,42 +8907,84 @@ define i16 @test_pextrw(<8 x i16> %a0) {
; SLM-NEXT: # kill: def $ax killed $ax killed $eax
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrw:
; SKX: # %bb.0:
; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00]
@@ -5568,42 +9017,84 @@ define <8 x i16> @test_pinsrw(<8 x i16>
; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrw:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5634,42 +9125,84 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaddwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaddwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaddwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaddwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaddwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaddwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaddwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaddwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaddwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaddwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaddwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaddwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaddwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -5706,42 +9239,84 @@ define <8 x i16> @test_pmaxsw(<8 x i16>
; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5777,42 +9352,84 @@ define <16 x i8> @test_pmaxub(<16 x i8>
; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxub:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxub:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxub:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxub:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxub:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxub:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxub:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxub:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxub:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxub:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxub:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxub:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxub:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5848,42 +9465,84 @@ define <8 x i16> @test_pminsw(<8 x i16>
; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsw:
; SKX: # %bb.0:
; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5919,42 +9578,84 @@ define <16 x i8> @test_pminub(<16 x i8>
; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminub:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminub:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminub:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminub:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminub:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminub:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminub:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminub:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminub:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminub:
; SKX: # %bb.0:
; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminub:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminub:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminub:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5985,36 +9686,71 @@ define i32 @test_pmovmskb(<16 x i8> %a0)
; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovmskb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovmskb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovmskb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovmskb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovmskb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovmskb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovmskb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovmskb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovmskb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovmskb:
; SKX: # %bb.0:
; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovmskb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovmskb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovmskb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovmskb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00]
@@ -6043,42 +9779,84 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulhuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulhuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulhuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulhuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulhuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulhuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulhuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulhuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulhuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulhuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulhuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulhuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulhuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6110,42 +9888,84 @@ define <8 x i16> @test_pmulhw(<8 x i16>
; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulhw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulhw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulhw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulhw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulhw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulhw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulhw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulhw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulhw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulhw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulhw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulhw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6177,42 +9997,84 @@ define <8 x i16> @test_pmullw(<8 x i16>
; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmullw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmullw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmullw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmullw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmullw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmullw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmullw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmullw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmullw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmullw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmullw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmullw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6243,42 +10105,84 @@ define <2 x i64> @test_pmuludq(<4 x i32>
; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmuludq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmuludq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmuludq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmuludq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmuludq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmuludq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmuludq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmuludq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmuludq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmuludq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmuludq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmuludq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmuludq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6314,6 +10218,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_por:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_por:
; SANDY: # %bb.0:
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6321,6 +10232,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_por:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_por:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6328,6 +10246,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_por:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_por:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6335,6 +10260,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_por:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_por:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6342,6 +10274,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_por:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_por:
; SKX: # %bb.0:
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6349,6 +10288,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_por:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_por:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6356,6 +10302,13 @@ define <2 x i64> @test_por(<2 x i64> %a0
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_por:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_por:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6392,42 +10345,84 @@ define <2 x i64> @test_psadbw(<16 x i8>
; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psadbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psadbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psadbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psadbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psadbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psadbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psadbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psadbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psadbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psadbw:
; SKX: # %bb.0:
; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psadbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psadbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psadbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6465,6 +10460,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshufd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshufd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
@@ -6472,6 +10474,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshufd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshufd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6479,6 +10488,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshufd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshufd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6486,6 +10502,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshufd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshufd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6493,6 +10516,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshufd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshufd:
; SKX: # %bb.0:
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6500,6 +10530,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshufd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshufd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
@@ -6507,6 +10544,13 @@ define <4 x i32> @test_pshufd(<4 x i32>
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshufd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshufd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
@@ -6544,6 +10588,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshufhw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshufhw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
@@ -6551,6 +10602,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshufhw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshufhw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6558,6 +10616,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshufhw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshufhw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6565,6 +10630,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshufhw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshufhw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6572,6 +10644,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshufhw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshufhw:
; SKX: # %bb.0:
; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6579,6 +10658,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshufhw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshufhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
@@ -6586,6 +10672,13 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshufhw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshufhw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
@@ -6623,6 +10716,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshuflw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshuflw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
@@ -6630,6 +10730,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshuflw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshuflw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6637,6 +10744,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshuflw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshuflw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6644,6 +10758,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshuflw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshuflw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6651,6 +10772,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshuflw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshuflw:
; SKX: # %bb.0:
; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6658,6 +10786,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshuflw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshuflw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
@@ -6665,6 +10800,13 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshuflw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshuflw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
@@ -6700,6 +10842,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pslld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pslld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6707,6 +10856,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pslld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pslld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6714,6 +10870,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pslld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pslld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6721,6 +10884,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pslld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pslld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6728,6 +10898,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pslld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pslld:
; SKX: # %bb.0:
; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6735,6 +10912,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pslld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pslld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6742,6 +10926,13 @@ define <4 x i32> @test_pslld(<4 x i32> %
; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pslld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pslld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6779,36 +10970,71 @@ define <4 x i32> @test_pslldq(<4 x i32>
; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pslldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pslldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pslldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pslldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pslldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pslldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pslldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pslldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pslldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pslldq:
; SKX: # %bb.0:
; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pslldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pslldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pslldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pslldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
@@ -6839,6 +11065,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psllq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psllq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6846,6 +11079,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psllq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psllq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6853,6 +11093,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psllq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psllq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6860,6 +11107,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psllq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psllq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6867,6 +11121,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psllq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psllq:
; SKX: # %bb.0:
; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6874,6 +11135,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psllq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psllq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6881,6 +11149,13 @@ define <2 x i64> @test_psllq(<2 x i64> %
; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psllq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psllq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6918,6 +11193,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psllw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psllw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6925,6 +11207,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psllw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psllw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6932,6 +11221,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psllw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psllw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6939,6 +11235,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psllw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psllw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6946,6 +11249,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psllw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psllw:
; SKX: # %bb.0:
; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6953,6 +11263,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psllw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psllw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6960,6 +11277,13 @@ define <8 x i16> @test_psllw(<8 x i16> %
; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psllw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psllw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6997,6 +11321,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrad:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrad:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7004,6 +11335,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrad:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrad:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7011,6 +11349,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrad:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrad:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7018,6 +11363,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrad:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrad:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7025,6 +11377,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrad:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrad:
; SKX: # %bb.0:
; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7032,6 +11391,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrad:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7039,6 +11405,13 @@ define <4 x i32> @test_psrad(<4 x i32> %
; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrad:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrad:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7076,6 +11449,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psraw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psraw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7083,6 +11463,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psraw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psraw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7090,6 +11477,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psraw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psraw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7097,6 +11491,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psraw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psraw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7104,6 +11505,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psraw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psraw:
; SKX: # %bb.0:
; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7111,6 +11519,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psraw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psraw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7118,6 +11533,13 @@ define <8 x i16> @test_psraw(<8 x i16> %
; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psraw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psraw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7155,6 +11577,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7162,6 +11591,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7169,6 +11605,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7176,6 +11619,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7183,6 +11633,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrld:
; SKX: # %bb.0:
; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7190,6 +11647,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7197,6 +11661,13 @@ define <4 x i32> @test_psrld(<4 x i32> %
; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7234,36 +11705,71 @@ define <4 x i32> @test_psrldq(<4 x i32>
; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrldq:
; SKX: # %bb.0:
; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
@@ -7294,6 +11800,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrlq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrlq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7301,6 +11814,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrlq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrlq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7308,6 +11828,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrlq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrlq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7315,6 +11842,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrlq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrlq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7322,6 +11856,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrlq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrlq:
; SKX: # %bb.0:
; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7329,6 +11870,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrlq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrlq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7336,6 +11884,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrlq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrlq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7373,6 +11928,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrlw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrlw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7380,6 +11942,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrlw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrlw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7387,6 +11956,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrlw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrlw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7394,6 +11970,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrlw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrlw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7401,6 +11984,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrlw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrlw:
; SKX: # %bb.0:
; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7408,6 +11998,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrlw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrlw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7415,6 +12012,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrlw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrlw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7453,42 +12057,84 @@ define <16 x i8> @test_psubb(<16 x i8> %
; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7523,42 +12169,84 @@ define <4 x i32> @test_psubd(<4 x i32> %
; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubd:
; SKX: # %bb.0:
; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7589,42 +12277,84 @@ define <2 x i64> @test_psubq(<2 x i64> %
; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubq:
; SKX: # %bb.0:
; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7659,42 +12389,84 @@ define <16 x i8> @test_psubsb(<16 x i8>
; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubsb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7730,42 +12502,84 @@ define <8 x i16> @test_psubsw(<8 x i16>
; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubsw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7801,42 +12615,84 @@ define <16 x i8> @test_psubusb(<16 x i8>
; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubusb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubusb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubusb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubusb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubusb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubusb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubusb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubusb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubusb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubusb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubusb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubusb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubusb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7872,42 +12728,84 @@ define <8 x i16> @test_psubusw(<8 x i16>
; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubusw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubusw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubusw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubusw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubusw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubusw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubusw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubusw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubusw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubusw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubusw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubusw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubusw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7943,42 +12841,84 @@ define <8 x i16> @test_psubw(<8 x i16> %
; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8013,42 +12953,84 @@ define <16 x i8> @test_punpckhbw(<16 x i
; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhbw:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
@@ -8084,6 +13066,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8091,6 +13080,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8098,6 +13094,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8105,6 +13108,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8112,6 +13122,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8119,6 +13136,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8126,6 +13150,13 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8161,6 +13192,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhqdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhqdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8168,6 +13206,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhqdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhqdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8175,6 +13220,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhqdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhqdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8182,6 +13234,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhqdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhqdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8189,6 +13248,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhqdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhqdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8196,6 +13262,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhqdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhqdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8203,6 +13276,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhqdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhqdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8239,42 +13319,84 @@ define <8 x i16> @test_punpckhwd(<8 x i1
; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhwd:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8309,42 +13431,84 @@ define <16 x i8> @test_punpcklbw(<16 x i
; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklbw:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8380,6 +13544,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8387,6 +13558,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8394,6 +13572,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8401,6 +13586,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8408,6 +13600,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckldq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8415,6 +13614,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8422,6 +13628,13 @@ define <4 x i32> @test_punpckldq(<4 x i3
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8457,6 +13670,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklqdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklqdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8464,6 +13684,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklqdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklqdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8471,6 +13698,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklqdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklqdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8478,6 +13712,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklqdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklqdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8485,6 +13726,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklqdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklqdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8492,6 +13740,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklqdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklqdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8499,6 +13754,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklqdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklqdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
@@ -8535,42 +13797,84 @@ define <8 x i16> @test_punpcklwd(<8 x i1
; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklwd:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8604,6 +13908,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pxor:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pxor:
; SANDY: # %bb.0:
; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8611,6 +13922,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pxor:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pxor:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8618,6 +13936,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pxor:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pxor:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8625,6 +13950,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pxor:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pxor:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8632,6 +13964,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pxor:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pxor:
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8639,6 +13978,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pxor:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pxor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -8646,6 +13992,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pxor:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pxor:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8681,6 +14034,13 @@ define <2 x double> @test_shufpd(<2 x do
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_shufpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_shufpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8688,6 +14048,13 @@ define <2 x double> @test_shufpd(<2 x do
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_shufpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_shufpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8695,6 +14062,13 @@ define <2 x double> @test_shufpd(<2 x do
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_shufpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_shufpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8702,6 +14076,13 @@ define <2 x double> @test_shufpd(<2 x do
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_shufpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_shufpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8709,6 +14090,13 @@ define <2 x double> @test_shufpd(<2 x do
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_shufpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_shufpd:
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8716,6 +14104,13 @@ define <2 x double> @test_shufpd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_shufpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8723,6 +14118,13 @@ define <2 x double> @test_shufpd(<2 x do
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_shufpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_shufpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8759,6 +14161,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00]
@@ -8766,6 +14175,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8773,6 +14189,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8780,6 +14203,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8787,6 +14217,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtpd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8794,6 +14231,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
+; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00]
@@ -8801,6 +14245,13 @@ define <2 x double> @test_sqrtpd(<2 x do
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
@@ -8842,6 +14293,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8850,6 +14309,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8858,6 +14325,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8866,6 +14341,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8874,6 +14357,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00]
+; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtsd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8882,6 +14373,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00]
@@ -8890,6 +14389,14 @@ define <2 x double> @test_sqrtsd(<2 x do
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50]
@@ -8924,42 +14431,84 @@ define <2 x double> @test_subpd(<2 x dou
; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -8990,42 +14539,84 @@ define double @test_subsd(double %a0, do
; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subsd:
; SKX: # %bb.0:
; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -9080,6 +14671,20 @@ define i32 @test_ucomisd(<2 x double> %a
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ucomisd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ucomisd:
; SANDY: # %bb.0:
; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9094,6 +14699,20 @@ define i32 @test_ucomisd(<2 x double> %a
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ucomisd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ucomisd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9108,6 +14727,20 @@ define i32 @test_ucomisd(<2 x double> %a
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ucomisd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ucomisd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9122,6 +14755,20 @@ define i32 @test_ucomisd(<2 x double> %a
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ucomisd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ucomisd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9136,6 +14783,20 @@ define i32 @test_ucomisd(<2 x double> %a
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ucomisd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ucomisd:
; SKX: # %bb.0:
; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9150,6 +14811,20 @@ define i32 @test_ucomisd(<2 x double> %a
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ucomisd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ucomisd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9164,6 +14839,20 @@ define i32 @test_ucomisd(<2 x double> %a
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ucomisd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ucomisd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9207,6 +14896,13 @@ define <2 x double> @test_unpckhpd(<2 x
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpckhpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpckhpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9214,6 +14910,13 @@ define <2 x double> @test_unpckhpd(<2 x
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpckhpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpckhpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9221,6 +14924,13 @@ define <2 x double> @test_unpckhpd(<2 x
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpckhpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpckhpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9228,6 +14938,13 @@ define <2 x double> @test_unpckhpd(<2 x
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpckhpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpckhpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9235,6 +14952,13 @@ define <2 x double> @test_unpckhpd(<2 x
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpckhpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpckhpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9242,6 +14966,13 @@ define <2 x double> @test_unpckhpd(<2 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpckhpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9249,6 +14980,13 @@ define <2 x double> @test_unpckhpd(<2 x
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpckhpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpckhpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9290,6 +15028,15 @@ define <2 x double> @test_unpcklpd(<2 x
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpcklpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpcklpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9297,6 +15044,15 @@ define <2 x double> @test_unpcklpd(<2 x
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpcklpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpcklpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9304,6 +15060,15 @@ define <2 x double> @test_unpcklpd(<2 x
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpcklpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpcklpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9311,6 +15076,15 @@ define <2 x double> @test_unpcklpd(<2 x
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpcklpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpcklpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9318,6 +15092,15 @@ define <2 x double> @test_unpcklpd(<2 x
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpcklpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpcklpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9325,6 +15108,15 @@ define <2 x double> @test_unpcklpd(<2 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpcklpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9332,6 +15124,15 @@ define <2 x double> @test_unpcklpd(<2 x
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpcklpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpcklpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9367,6 +15168,13 @@ define <2 x double> @test_xorpd(<2 x dou
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_xorpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_xorpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9374,6 +15182,13 @@ define <2 x double> @test_xorpd(<2 x dou
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_xorpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_xorpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9381,6 +15196,13 @@ define <2 x double> @test_xorpd(<2 x dou
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_xorpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_xorpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9388,6 +15210,13 @@ define <2 x double> @test_xorpd(<2 x dou
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_xorpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_xorpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9395,6 +15224,13 @@ define <2 x double> @test_xorpd(<2 x dou
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_xorpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_xorpd:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9402,6 +15238,13 @@ define <2 x double> @test_xorpd(<2 x dou
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_xorpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -9409,6 +15252,13 @@ define <2 x double> @test_xorpd(<2 x dou
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_xorpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_xorpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_addsubpd:
@@ -30,42 +38,84 @@ define <2 x double> @test_addsubpd(<2 x
; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addsubpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addsubpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addsubpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addsubpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addsubpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addsubpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addsubpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addsubpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addsubpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addsubpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addsubpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addsubpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -97,42 +147,84 @@ define <4 x float> @test_addsubps(<4 x f
; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addsubps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addsubps:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addsubps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addsubps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addsubps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addsubps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addsubps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addsubps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addsubps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addsubps:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addsubps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addsubps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addsubps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -164,42 +256,84 @@ define <2 x double> @test_haddpd(<2 x do
; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_haddpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_haddpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_haddpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_haddpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_haddpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_haddpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_haddpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_haddpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_haddpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_haddpd:
; SKX: # %bb.0:
; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_haddpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_haddpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -231,42 +365,84 @@ define <4 x float> @test_haddps(<4 x flo
; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_haddps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_haddps:
; SANDY: # %bb.0:
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_haddps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_haddps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_haddps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_haddps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_haddps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_haddps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_haddps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_haddps:
; SKX: # %bb.0:
; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_haddps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_haddps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_haddps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -298,42 +474,84 @@ define <2 x double> @test_hsubpd(<2 x do
; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_hsubpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_hsubpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_hsubpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_hsubpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_hsubpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_hsubpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_hsubpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_hsubpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_hsubpd:
; SKX: # %bb.0:
; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_hsubpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_hsubpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -365,42 +583,84 @@ define <4 x float> @test_hsubps(<4 x flo
; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_hsubps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_hsubps:
; SANDY: # %bb.0:
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_hsubps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_hsubps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_hsubps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_hsubps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_hsubps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
+; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_hsubps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_hsubps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
+; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_hsubps:
; SKX: # %bb.0:
; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_hsubps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_hsubps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -431,36 +691,71 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_lddqu:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_lddqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_lddqu:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_lddqu:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_lddqu:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_lddqu:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_lddqu:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_lddqu:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_lddqu:
; SKX: # %bb.0:
; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_lddqu:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_lddqu:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_lddqu:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
@@ -492,6 +787,13 @@ define void @test_monitor(i8* %a0, i32 %
; SLM-NEXT: monitor # sched: [100:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_monitor:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
+; SANDY-SSE-NEXT: monitor # sched: [100:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_monitor:
; SANDY: # %bb.0:
; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -499,6 +801,13 @@ define void @test_monitor(i8* %a0, i32 %
; SANDY-NEXT: monitor # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_monitor:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: monitor # sched: [100:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_monitor:
; HASWELL: # %bb.0:
; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -506,6 +815,13 @@ define void @test_monitor(i8* %a0, i32 %
; HASWELL-NEXT: monitor # sched: [100:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_monitor:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_monitor:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -513,6 +829,13 @@ define void @test_monitor(i8* %a0, i32 %
; BROADWELL-NEXT: monitor # sched: [100:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_monitor:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_monitor:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -520,6 +843,13 @@ define void @test_monitor(i8* %a0, i32 %
; SKYLAKE-NEXT: monitor # sched: [100:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_monitor:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; SKX-SSE-NEXT: monitor # sched: [100:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_monitor:
; SKX: # %bb.0:
; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -527,6 +857,13 @@ define void @test_monitor(i8* %a0, i32 %
; SKX-NEXT: monitor # sched: [100:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_monitor:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: monitor # sched: [100:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_monitor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
@@ -534,6 +871,13 @@ define void @test_monitor(i8* %a0, i32 %
; BTVER2-NEXT: monitor # sched: [100:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_monitor:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: monitor # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_monitor:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
@@ -569,6 +913,13 @@ define <2 x double> @test_movddup(<2 x d
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movddup:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
+; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movddup:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -576,6 +927,13 @@ define <2 x double> @test_movddup(<2 x d
; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movddup:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movddup:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -583,6 +941,13 @@ define <2 x double> @test_movddup(<2 x d
; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movddup:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movddup:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -590,6 +955,13 @@ define <2 x double> @test_movddup(<2 x d
; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movddup:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movddup:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -597,6 +969,13 @@ define <2 x double> @test_movddup(<2 x d
; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movddup:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
+; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movddup:
; SKX: # %bb.0:
; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
@@ -604,6 +983,13 @@ define <2 x double> @test_movddup(<2 x d
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movddup:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movddup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
@@ -611,6 +997,13 @@ define <2 x double> @test_movddup(<2 x d
; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movddup:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movddup:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
@@ -648,6 +1041,13 @@ define <4 x float> @test_movshdup(<4 x f
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movshdup:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movshdup:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -655,6 +1055,13 @@ define <4 x float> @test_movshdup(<4 x f
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movshdup:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movshdup:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -662,6 +1069,13 @@ define <4 x float> @test_movshdup(<4 x f
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movshdup:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movshdup:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -669,6 +1083,13 @@ define <4 x float> @test_movshdup(<4 x f
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movshdup:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movshdup:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -676,6 +1097,13 @@ define <4 x float> @test_movshdup(<4 x f
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movshdup:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movshdup:
; SKX: # %bb.0:
; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
@@ -683,6 +1111,13 @@ define <4 x float> @test_movshdup(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movshdup:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
@@ -690,6 +1125,13 @@ define <4 x float> @test_movshdup(<4 x f
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movshdup:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movshdup:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
@@ -727,6 +1169,13 @@ define <4 x float> @test_movsldup(<4 x f
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movsldup:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movsldup:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -734,6 +1183,13 @@ define <4 x float> @test_movsldup(<4 x f
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movsldup:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movsldup:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -741,6 +1197,13 @@ define <4 x float> @test_movsldup(<4 x f
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movsldup:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movsldup:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -748,6 +1211,13 @@ define <4 x float> @test_movsldup(<4 x f
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movsldup:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movsldup:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -755,6 +1225,13 @@ define <4 x float> @test_movsldup(<4 x f
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movsldup:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movsldup:
; SKX: # %bb.0:
; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
@@ -762,6 +1239,13 @@ define <4 x float> @test_movsldup(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movsldup:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
@@ -769,6 +1253,13 @@ define <4 x float> @test_movsldup(<4 x f
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movsldup:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:?]
+; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:?]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movsldup:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
@@ -804,6 +1295,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; SLM-NEXT: mwait # sched: [100:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mwait:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: mwait # sched: [100:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mwait:
; SANDY: # %bb.0:
; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33]
@@ -811,6 +1309,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; SANDY-NEXT: mwait # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mwait:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: mwait # sched: [20:2.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mwait:
; HASWELL: # %bb.0:
; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
@@ -818,6 +1323,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; HASWELL-NEXT: mwait # sched: [20:2.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mwait:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mwait:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
@@ -825,6 +1337,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; BROADWELL-NEXT: mwait # sched: [100:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mwait:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mwait:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25]
@@ -832,6 +1351,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; SKYLAKE-NEXT: mwait # sched: [20:2.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mwait:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: mwait # sched: [20:2.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mwait:
; SKX: # %bb.0:
; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
@@ -839,6 +1365,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; SKX-NEXT: mwait # sched: [20:2.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mwait:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: mwait # sched: [100:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mwait:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
@@ -846,6 +1379,13 @@ define void @test_mwait(i32 %a0, i32 %a1
; BTVER2-NEXT: mwait # sched: [100:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mwait:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: mwait # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mwait:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,14 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_blendpd:
@@ -25,6 +33,13 @@ define <2 x double> @test_blendpd(<2 x d
; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -32,6 +47,13 @@ define <2 x double> @test_blendpd(<2 x d
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -39,6 +61,13 @@ define <2 x double> @test_blendpd(<2 x d
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -46,6 +75,13 @@ define <2 x double> @test_blendpd(<2 x d
; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -53,14 +89,27 @@ define <2 x double> @test_blendpd(<2 x d
; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50]
+; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00]
+; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -68,6 +117,13 @@ define <2 x double> @test_blendpd(<2 x d
; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -96,6 +152,13 @@ define <4 x float> @test_blendps(<4 x fl
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendps:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -103,6 +166,13 @@ define <4 x float> @test_blendps(<4 x fl
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -110,6 +180,13 @@ define <4 x float> @test_blendps(<4 x fl
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -117,6 +194,13 @@ define <4 x float> @test_blendps(<4 x fl
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -124,6 +208,13 @@ define <4 x float> @test_blendps(<4 x fl
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendps:
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -131,6 +222,13 @@ define <4 x float> @test_blendps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -138,6 +236,13 @@ define <4 x float> @test_blendps(<4 x fl
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -170,42 +275,105 @@ define <2 x double> @test_blendvpd(<2 x
; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendvpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendvpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendvpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendvpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendvpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendvpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendvpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendvpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendvpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendvpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendvpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendvpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -237,42 +405,105 @@ define <4 x float> @test_blendvps(<4 x f
; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendvps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendvps:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendvps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendvps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendvps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendvps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendvps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendvps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendvps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendvps:
; SKX: # %bb.0:
; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendvps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendvps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendvps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -298,42 +529,84 @@ define <2 x double> @test_dppd(<2 x doub
; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_dppd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_dppd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_dppd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_dppd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_dppd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_dppd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_dppd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_dppd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_dppd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_dppd:
; SKX: # %bb.0:
; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_dppd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
+; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_dppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_dppd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_dppd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -359,42 +632,84 @@ define <4 x float> @test_dpps(<4 x float
; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_dpps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_dpps:
; SANDY: # %bb.0:
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_dpps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_dpps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_dpps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_dpps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_dpps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50]
+; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_dpps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50]
; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_dpps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33]
+; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_dpps:
; SKX: # %bb.0:
; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_dpps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_dpps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_dpps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -420,42 +735,84 @@ define i32 @test_extractps(<4 x float> %
; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_extractps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_extractps:
; SANDY: # %bb.0:
; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_extractps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_extractps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_extractps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_extractps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_extractps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_extractps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_extractps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_extractps:
; SKX: # %bb.0:
; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_extractps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_extractps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_extractps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_extractps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00]
@@ -482,42 +839,84 @@ define <4 x float> @test_insertps(<4 x f
; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_insertps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_insertps:
; SANDY: # %bb.0:
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_insertps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_insertps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_insertps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_insertps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_insertps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_insertps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_insertps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_insertps:
; SKX: # %bb.0:
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_insertps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_insertps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_insertps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_insertps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
@@ -541,36 +940,71 @@ define <2 x i64> @test_movntdqa(i8* %a0)
; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntdqa:
; SKX: # %bb.0:
; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -593,42 +1027,84 @@ define <8 x i16> @test_mpsadbw(<16 x i8>
; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mpsadbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mpsadbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mpsadbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mpsadbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mpsadbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mpsadbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mpsadbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mpsadbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mpsadbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mpsadbw:
; SKX: # %bb.0:
; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mpsadbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
+; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mpsadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mpsadbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mpsadbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -655,42 +1131,84 @@ define <8 x i16> @test_packusdw(<4 x i32
; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packusdw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packusdw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packusdw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packusdw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packusdw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packusdw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packusdw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packusdw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packusdw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packusdw:
; SKX: # %bb.0:
; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packusdw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packusdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packusdw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packusdw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -723,42 +1241,105 @@ define <16 x i8> @test_pblendvb(<16 x i8
; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pblendvb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pblendvb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pblendvb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pblendvb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pblendvb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pblendvb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pblendvb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pblendvb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pblendvb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pblendvb:
; SKX: # %bb.0:
; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pblendvb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pblendvb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pblendvb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pblendvb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -786,6 +1367,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pblendw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pblendw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -793,6 +1381,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pblendw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pblendw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -800,6 +1395,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pblendw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pblendw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -807,6 +1409,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pblendw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pblendw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -814,6 +1423,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pblendw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pblendw:
; SKX: # %bb.0:
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -821,6 +1437,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pblendw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pblendw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -828,6 +1451,13 @@ define <8 x i16> @test_pblendw(<8 x i16>
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pblendw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
+; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pblendw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
@@ -854,42 +1484,84 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -916,42 +1588,84 @@ define i32 @test_pextrb(<16 x i8> %a0, i
; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrb:
; SKX: # %bb.0:
; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00]
@@ -979,6 +1693,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -986,6 +1707,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -993,6 +1721,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1000,6 +1735,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1007,6 +1749,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrd:
; SKX: # %bb.0:
; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1014,6 +1763,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1021,6 +1777,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1047,42 +1810,84 @@ define i64 @test_pextrq(<2 x i64> %a0, <
; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrq:
; SKX: # %bb.0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00]
@@ -1107,42 +1912,84 @@ define i32 @test_pextrw(<8 x i16> %a0, i
; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrw:
; SKX: # %bb.0:
; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00]
@@ -1168,42 +2015,84 @@ define <8 x i16> @test_phminposuw(<8 x i
; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_phminposuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_phminposuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_phminposuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_phminposuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_phminposuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_phminposuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_phminposuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_phminposuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_phminposuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_phminposuw:
; SKX: # %bb.0:
; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_phminposuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_phminposuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_phminposuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_phminposuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
@@ -1229,42 +2118,84 @@ define <16 x i8> @test_pinsrb(<16 x i8>
; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrb:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1289,42 +2220,84 @@ define <4 x i32> @test_pinsrd(<4 x i32>
; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrd:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1351,6 +2324,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1358,6 +2338,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1365,6 +2352,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1372,6 +2366,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1379,6 +2380,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrq:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1386,6 +2394,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
@@ -1393,6 +2408,13 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
@@ -1419,42 +2441,84 @@ define <16 x i8> @test_pmaxsb(<16 x i8>
; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsb:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1480,42 +2544,84 @@ define <4 x i32> @test_pmaxsd(<4 x i32>
; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1541,42 +2647,84 @@ define <4 x i32> @test_pmaxud(<4 x i32>
; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxud:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxud:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxud:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxud:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxud:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxud:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxud:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxud:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxud:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxud:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxud:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxud:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxud:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxud:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1602,42 +2750,84 @@ define <8 x i16> @test_pmaxuw(<8 x i16>
; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1663,42 +2853,84 @@ define <16 x i8> @test_pminsb(<16 x i8>
; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsb:
; SKX: # %bb.0:
; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1724,42 +2956,84 @@ define <4 x i32> @test_pminsd(<4 x i32>
; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsd:
; SKX: # %bb.0:
; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1785,42 +3059,84 @@ define <4 x i32> @test_pminud(<4 x i32>
; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminud:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminud:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminud:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminud:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminud:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminud:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminud:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminud:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminud:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminud:
; SKX: # %bb.0:
; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminud:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminud:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminud:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminud:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1846,42 +3162,84 @@ define <8 x i16> @test_pminuw(<8 x i16>
; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminuw:
; SKX: # %bb.0:
; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1910,6 +3268,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
@@ -1917,6 +3282,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1924,6 +3296,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1931,6 +3310,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1938,6 +3324,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbw:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1945,6 +3338,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
@@ -1952,6 +3352,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
@@ -1982,6 +3389,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
@@ -1989,6 +3403,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -1996,6 +3417,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2003,6 +3431,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2010,6 +3445,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2017,6 +3459,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2024,6 +3473,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2054,6 +3510,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2061,6 +3524,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2068,6 +3538,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2075,6 +3552,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2082,6 +3566,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2089,6 +3580,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2096,6 +3594,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2126,6 +3631,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2133,6 +3645,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2140,6 +3659,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2147,6 +3673,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2154,6 +3687,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxdq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2161,6 +3701,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2168,6 +3715,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2198,6 +3752,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
@@ -2205,6 +3766,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2212,6 +3780,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2219,6 +3794,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2226,6 +3808,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2233,6 +3822,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2240,6 +3836,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2270,6 +3873,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxwq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxwq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2277,6 +3887,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxwq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxwq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2284,6 +3901,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxwq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxwq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2291,6 +3915,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxwq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxwq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2298,6 +3929,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxwq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxwq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2305,6 +3943,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxwq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxwq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2312,6 +3957,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxwq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2342,6 +3994,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
@@ -2349,6 +4008,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2356,6 +4022,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2363,6 +4036,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2370,6 +4050,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbw:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2377,6 +4064,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
@@ -2384,6 +4078,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
@@ -2414,6 +4115,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
@@ -2421,6 +4129,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2428,6 +4143,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2435,6 +4157,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2442,6 +4171,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2449,6 +4185,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
@@ -2456,6 +4199,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
@@ -2486,6 +4236,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
@@ -2493,6 +4250,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2500,6 +4264,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2507,6 +4278,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2514,6 +4292,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2521,6 +4306,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
@@ -2528,6 +4320,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
@@ -2558,6 +4357,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
@@ -2565,6 +4371,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2572,6 +4385,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2579,6 +4399,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2586,6 +4413,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxdq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2593,6 +4427,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
@@ -2600,6 +4441,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
@@ -2630,6 +4478,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
@@ -2637,6 +4492,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2644,6 +4506,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2651,6 +4520,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2658,6 +4534,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2665,6 +4548,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
@@ -2672,6 +4562,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
@@ -2702,6 +4599,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxwq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxwq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
@@ -2709,6 +4613,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxwq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxwq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2716,6 +4627,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxwq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxwq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2723,6 +4641,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxwq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxwq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2730,6 +4655,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxwq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxwq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2737,6 +4669,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxwq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxwq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
@@ -2744,6 +4683,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxwq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
@@ -2771,42 +4717,84 @@ define <2 x i64> @test_pmuldq(<4 x i32>
; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmuldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmuldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmuldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmuldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmuldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmuldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmuldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmuldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmuldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmuldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmuldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmuldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2833,42 +4821,84 @@ define <4 x i32> @test_pmulld(<4 x i32>
; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67]
+; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2901,6 +4931,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ptest:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setb %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ptest:
; SANDY: # %bb.0:
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2911,6 +4951,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ptest:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ptest:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2921,6 +4971,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ptest:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ptest:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2931,6 +4991,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ptest:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ptest:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2941,6 +5011,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ptest:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT: setb %al # sched: [1:0.50]
+; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ptest:
; SKX: # %bb.0:
; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2951,6 +5031,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ptest:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ptest:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2961,6 +5051,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ptest:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ptest:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00]
@@ -2994,6 +5094,13 @@ define <2 x double> @test_roundpd(<2 x d
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3001,6 +5108,13 @@ define <2 x double> @test_roundpd(<2 x d
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3008,6 +5122,14 @@ define <2 x double> @test_roundpd(<2 x d
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3015,6 +5137,13 @@ define <2 x double> @test_roundpd(<2 x d
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3022,6 +5151,13 @@ define <2 x double> @test_roundpd(<2 x d
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3029,6 +5165,13 @@ define <2 x double> @test_roundpd(<2 x d
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3036,6 +5179,13 @@ define <2 x double> @test_roundpd(<2 x d
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3066,6 +5216,13 @@ define <4 x float> @test_roundps(<4 x fl
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundps:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3073,6 +5230,13 @@ define <4 x float> @test_roundps(<4 x fl
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3080,6 +5244,14 @@ define <4 x float> @test_roundps(<4 x fl
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3087,6 +5259,13 @@ define <4 x float> @test_roundps(<4 x fl
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3094,6 +5273,13 @@ define <4 x float> @test_roundps(<4 x fl
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3101,6 +5287,13 @@ define <4 x float> @test_roundps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3108,6 +5301,13 @@ define <4 x float> @test_roundps(<4 x fl
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3139,6 +5339,14 @@ define <2 x double> @test_roundsd(<2 x d
; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3146,6 +5354,14 @@ define <2 x double> @test_roundsd(<2 x d
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3153,6 +5369,14 @@ define <2 x double> @test_roundsd(<2 x d
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3160,6 +5384,14 @@ define <2 x double> @test_roundsd(<2 x d
; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3167,6 +5399,14 @@ define <2 x double> @test_roundsd(<2 x d
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundsd:
; SKX: # %bb.0:
; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3174,6 +5414,14 @@ define <2 x double> @test_roundsd(<2 x d
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3181,6 +5429,14 @@ define <2 x double> @test_roundsd(<2 x d
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
@@ -3212,6 +5468,14 @@ define <4 x float> @test_roundss(<4 x fl
; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundss:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3219,6 +5483,14 @@ define <4 x float> @test_roundss(<4 x fl
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3226,6 +5498,14 @@ define <4 x float> @test_roundss(<4 x fl
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3233,6 +5513,14 @@ define <4 x float> @test_roundss(<4 x fl
; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3240,6 +5528,14 @@ define <4 x float> @test_roundss(<4 x fl
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundss:
; SKX: # %bb.0:
; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3247,6 +5543,14 @@ define <4 x float> @test_roundss(<4 x fl
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3254,6 +5558,14 @@ define <4 x float> @test_roundss(<4 x fl
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,14 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; GENERIC-LABEL: crc32_32_8:
@@ -25,6 +33,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: crc32_32_8:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: crc32_32_8:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -32,6 +47,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: crc32_32_8:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: crc32_32_8:
; HASWELL: # %bb.0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -39,6 +61,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: crc32_32_8:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: crc32_32_8:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -46,6 +75,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: crc32_32_8:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: crc32_32_8:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -53,6 +89,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: crc32_32_8:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: crc32_32_8:
; SKX: # %bb.0:
; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -60,6 +103,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: crc32_32_8:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: crc32_32_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00]
@@ -67,6 +117,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: crc32_32_8:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: crc32_32_8:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -95,6 +152,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: crc32_32_16:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
+; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: crc32_32_16:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -102,6 +166,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: crc32_32_16:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: crc32_32_16:
; HASWELL: # %bb.0:
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -109,6 +180,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: crc32_32_16:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: crc32_32_16:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -116,6 +194,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: crc32_32_16:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: crc32_32_16:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -123,6 +208,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: crc32_32_16:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: crc32_32_16:
; SKX: # %bb.0:
; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -130,6 +222,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: crc32_32_16:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: crc32w %si, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT: crc32w (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: crc32_32_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: crc32w %si, %edi # sched: [3:2.00]
@@ -137,6 +236,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: crc32_32_16:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: crc32w (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: crc32_32_16:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00]
@@ -165,6 +271,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: crc32_32_32:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
+; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: crc32_32_32:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -172,6 +285,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: crc32_32_32:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: crc32_32_32:
; HASWELL: # %bb.0:
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -179,6 +299,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: crc32_32_32:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: crc32_32_32:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -186,6 +313,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: crc32_32_32:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: crc32_32_32:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -193,6 +327,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: crc32_32_32:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: crc32_32_32:
; SKX: # %bb.0:
; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -200,6 +341,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: crc32_32_32:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: crc32l %esi, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT: crc32l (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: crc32_32_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:2.00]
@@ -207,6 +355,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: crc32_32_32:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: crc32l (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: crc32_32_32:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00]
@@ -235,6 +390,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: crc32_64_8:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: crc32_64_8:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -242,6 +404,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: crc32_64_8:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: crc32_64_8:
; HASWELL: # %bb.0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -249,6 +418,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: crc32_64_8:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: crc32_64_8:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -256,6 +432,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: crc32_64_8:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: crc32_64_8:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -263,6 +446,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: crc32_64_8:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: crc32_64_8:
; SKX: # %bb.0:
; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -270,6 +460,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: crc32_64_8:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00]
+; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00]
+; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: crc32_64_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00]
@@ -277,6 +474,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: crc32_64_8:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: crc32_64_8:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
@@ -305,6 +509,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: crc32_64_64:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; SANDY-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: crc32_64_64:
; SANDY: # %bb.0:
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -312,6 +523,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: crc32_64_64:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; HASWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: crc32_64_64:
; HASWELL: # %bb.0:
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -319,6 +537,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: crc32_64_64:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: crc32_64_64:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -326,6 +551,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: crc32_64_64:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: crc32_64_64:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -333,6 +565,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: crc32_64_64:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; SKX-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: crc32_64_64:
; SKX: # %bb.0:
; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -340,6 +579,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: crc32_64_64:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:2.00]
+; BTVER2-SSE-NEXT: crc32q (%rdx), %rdi # sched: [6:2.00]
+; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: crc32_64_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:2.00]
@@ -347,6 +593,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: crc32_64_64:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: crc32_64_64:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
@@ -387,6 +640,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; SLM-NEXT: leal (%rcx,%rsi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpestri:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; SANDY-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SANDY-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpestri:
; SANDY: # %bb.0:
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
@@ -400,6 +666,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpestri:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; HASWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; HASWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpestri:
; HASWELL: # %bb.0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -413,6 +692,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpestri:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; BROADWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00]
+; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BROADWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpestri:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -426,6 +718,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpestri:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; SKYLAKE-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SKYLAKE-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpestri:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -439,6 +744,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpestri:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; SKX-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
+; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SKX-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpestri:
; SKX: # %bb.0:
; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -452,6 +770,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpestri:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00]
+; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00]
+; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BTVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpestri:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
@@ -465,6 +796,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpestri:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; ZNVER1-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpestri:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -506,6 +850,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; SLM-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpestrm:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpestrm:
; SANDY: # %bb.0:
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
@@ -516,6 +870,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpestrm:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; HASWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpestrm:
; HASWELL: # %bb.0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -526,6 +890,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpestrm:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpestrm:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -536,6 +910,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpestrm:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpestrm:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -546,6 +930,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpestrm:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpestrm:
; SKX: # %bb.0:
; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -556,6 +950,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpestrm:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00]
+; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpestrm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
@@ -566,6 +970,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpestrm:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpestrm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
@@ -601,6 +1015,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; SLM-NEXT: leal (%rcx,%rax), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpistri:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; SANDY-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SANDY-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpistri:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -610,6 +1033,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpistri:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; HASWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpistri:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -619,6 +1051,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpistri:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BROADWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BROADWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpistri:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -628,6 +1069,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpistri:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKYLAKE-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SKYLAKE-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpistri:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
@@ -637,6 +1087,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpistri:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SKX-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpistri:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
@@ -646,6 +1105,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpistri:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00]
+; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BTVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpistri:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
@@ -655,6 +1123,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpistri:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; ZNVER1-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpistri:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [100:?]
@@ -684,42 +1161,84 @@ define <16 x i8> @test_pcmpistrm(<16 x i
; SLM-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpistrm:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; SANDY-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpistrm:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpistrm:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpistrm:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpistrm:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BROADWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpistrm:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpistrm:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKYLAKE-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpistrm:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpistrm:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpistrm:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpistrm:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
+; BTVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpistrm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpistrm:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpistrm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:?]
@@ -745,42 +1264,84 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
; SLM-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -807,42 +1368,84 @@ define <2 x i64> @test_pclmulqdq(<2 x i6
; SLM-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:10.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pclmulqdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
+; SANDY-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pclmulqdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
; SANDY-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pclmulqdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [11:2.00]
+; HASWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pclmulqdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pclmulqdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pclmulqdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pclmulqdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pclmulqdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pclmulqdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pclmulqdq:
; SKX: # %bb.0:
; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pclmulqdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pclmulqdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pclmulqdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pclmulqdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:?]
More information about the llvm-commits
mailing list