[llvm] r328423 - [X86][SSE] Ensure we're testing both non-VEX/VEX variants of SSE instructions on AVX targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 24 07:51:52 PDT 2018


Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=328423&r1=328422&r2=328423&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Sat Mar 24 07:51:52 2018
@@ -1,15 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,ZNVER1
 
 define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
 ; GENERIC-LABEL: test_pabsb:
@@ -34,6 +42,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pabsb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pabsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
@@ -41,6 +56,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pabsb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pabsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
@@ -48,6 +70,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pabsb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pabsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
@@ -55,6 +84,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pabsb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pabsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
@@ -62,6 +98,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pabsb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pabsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
@@ -69,6 +112,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pabsb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pabsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpabsb (%rdi), %xmm1 # sched: [6:1.00]
@@ -76,6 +126,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pabsb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pabsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpabsb (%rdi), %xmm1 # sched: [8:0.50]
@@ -113,6 +170,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pabsd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pabsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
@@ -120,6 +184,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pabsd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pabsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
@@ -127,6 +198,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pabsd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pabsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
@@ -134,6 +212,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pabsd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pabsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
@@ -141,6 +226,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pabsd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pabsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
@@ -148,6 +240,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pabsd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pabsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpabsd (%rdi), %xmm1 # sched: [6:1.00]
@@ -155,6 +254,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pabsd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pabsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpabsd (%rdi), %xmm1 # sched: [8:0.50]
@@ -192,6 +298,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pabsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pabsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
@@ -199,6 +312,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pabsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pabsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
@@ -206,6 +326,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pabsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pabsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
@@ -213,6 +340,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pabsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pabsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
@@ -220,6 +354,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pabsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pabsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
@@ -227,6 +368,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pabsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pabsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpabsw (%rdi), %xmm1 # sched: [6:1.00]
@@ -234,6 +382,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pabsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pabsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpabsw (%rdi), %xmm1 # sched: [8:0.50]
@@ -272,42 +427,91 @@ define <8 x i16> @test_palignr(<8 x i16>
 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_palignr:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; SANDY-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; SANDY-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_palignr:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
 ; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_palignr:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
+; HASWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
+; HASWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_palignr:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
 ; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_palignr:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_palignr:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
 ; BROADWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_palignr:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_palignr:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_palignr:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
+; SKX-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
+; SKX-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_palignr:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
 ; SKX-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_palignr:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; BTVER2-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
+; BTVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_palignr:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
 ; BTVER2-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_palignr:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_palignr:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
@@ -338,42 +542,84 @@ define <4 x i32> @test_phaddd(<4 x i32>
 ; SLM-NEXT:    phaddd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phaddd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phaddd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phaddd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phaddd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phaddd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phaddd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phaddd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phaddd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phaddd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phaddd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phaddd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phaddd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phaddd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phaddd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -405,42 +651,84 @@ define <8 x i16> @test_phaddsw(<8 x i16>
 ; SLM-NEXT:    phaddsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phaddsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phaddsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phaddsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phaddsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phaddsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phaddsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phaddsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phaddsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phaddsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phaddsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phaddsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phaddsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phaddsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phaddsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -472,42 +760,84 @@ define <8 x i16> @test_phaddw(<8 x i16>
 ; SLM-NEXT:    phaddw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phaddw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phaddw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phaddw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phaddw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phaddw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phaddw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phaddw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phaddw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phaddw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phaddw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phaddw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phaddw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phaddw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phaddw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -539,42 +869,84 @@ define <4 x i32> @test_phsubd(<4 x i32>
 ; SLM-NEXT:    phsubd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phsubd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phsubd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phsubd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phsubd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phsubd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phsubd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phsubd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phsubd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phsubd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phsubd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phsubd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phsubd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phsubd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phsubd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -606,42 +978,84 @@ define <8 x i16> @test_phsubsw(<8 x i16>
 ; SLM-NEXT:    phsubsw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phsubsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phsubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phsubsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phsubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phsubsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phsubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phsubsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phsubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phsubsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phsubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phsubsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phsubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phsubsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phsubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -673,42 +1087,84 @@ define <8 x i16> @test_phsubw(<8 x i16>
 ; SLM-NEXT:    phsubw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_phsubw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:1.50]
+; SANDY-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:1.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_phsubw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
 ; SANDY-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_phsubw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
+; HASWELL-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_phsubw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_phsubw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
+; BROADWELL-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [8:2.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_phsubw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_phsubw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
+; SKYLAKE-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_phsubw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_phsubw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
+; SKX-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_phsubw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
 ; SKX-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_phsubw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_phsubw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_phsubw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_phsubw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -740,42 +1196,84 @@ define <8 x i16> @test_pmaddubsw(<16 x i
 ; SLM-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmaddubsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmaddubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmaddubsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmaddubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmaddubsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmaddubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmaddubsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmaddubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmaddubsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmaddubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmaddubsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmaddubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmaddubsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmaddubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -808,42 +1306,84 @@ define <8 x i16> @test_pmulhrsw(<8 x i16
 ; SLM-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pmulhrsw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pmulhrsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pmulhrsw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pmulhrsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pmulhrsw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pmulhrsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pmulhrsw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pmulhrsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
 ; SKYLAKE-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pmulhrsw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pmulhrsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pmulhrsw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pmulhrsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pmulhrsw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pmulhrsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -875,42 +1415,84 @@ define <16 x i8> @test_pshufb(<16 x i8>
 ; SLM-NEXT:    pshufb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_pshufb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_pshufb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_pshufb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_pshufb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_pshufb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_pshufb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_pshufb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_pshufb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_pshufb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_pshufb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_pshufb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [2:2.00]
+; BTVER2-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:2.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_pshufb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
 ; BTVER2-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_pshufb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_pshufb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -946,42 +1528,84 @@ define <16 x i8> @test_psignb(<16 x i8>
 ; SLM-NEXT:    psignb (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psignb:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psignb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psignb:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psignb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psignb:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psignb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psignb:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psignb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psignb:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psignb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psignb:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psignb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psignb:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psignb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1017,42 +1641,84 @@ define <4 x i32> @test_psignd(<4 x i32>
 ; SLM-NEXT:    psignd (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psignd:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psignd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psignd:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psignd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psignd:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psignd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psignd:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psignd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psignd:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psignd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psignd:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psignd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psignd:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psignd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1088,42 +1754,84 @@ define <8 x i16> @test_psignw(<8 x i16>
 ; SLM-NEXT:    psignw (%rdi), %xmm0 # sched: [4:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
+; SANDY-SSE-LABEL: test_psignw:
+; SANDY-SSE:       # %bb.0:
+; SANDY-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_psignw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
+; HASWELL-SSE-LABEL: test_psignw:
+; HASWELL-SSE:       # %bb.0:
+; HASWELL-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; HASWELL-LABEL: test_psignw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; BROADWELL-SSE-LABEL: test_psignw:
+; BROADWELL-SSE:       # %bb.0:
+; BROADWELL-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; BROADWELL-LABEL: test_psignw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
+; SKYLAKE-SSE-LABEL: test_psignw:
+; SKYLAKE-SSE:       # %bb.0:
+; SKYLAKE-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKYLAKE-LABEL: test_psignw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
+; SKX-SSE-LABEL: test_psignw:
+; SKX-SSE:       # %bb.0:
+; SKX-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT:    retq # sched: [7:1.00]
+;
 ; SKX-LABEL: test_psignw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
+; BTVER2-SSE-LABEL: test_psignw:
+; BTVER2-SSE:       # %bb.0:
+; BTVER2-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
+;
 ; BTVER2-LABEL: test_psignw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
+; ZNVER1-SSE-LABEL: test_psignw:
+; ZNVER1-SSE:       # %bb.0:
+; ZNVER1-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
+;
 ; ZNVER1-LABEL: test_psignw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]




More information about the llvm-commits mailing list