[llvm] r309691 - [X86] Added missing cpu to fix generic scheduling model tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 08:14:35 PDT 2017


Author: rksimon
Date: Tue Aug  1 08:14:35 2017
New Revision: 309691

URL: http://llvm.org/viewvc/llvm-project?rev=309691&view=rev
Log:
[X86] Added missing cpu to fix generic scheduling model tests

Modified:
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
    llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
    llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
    llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
    llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
    llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
@@ -7,6 +8,12 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
 
 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_addpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -37,6 +44,12 @@ define <4 x double> @test_addpd(<4 x dou
 }
 
 define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_addps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -67,6 +80,12 @@ define <8 x float> @test_addps(<8 x floa
 }
 
 define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_addsubpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addsubpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -98,6 +117,12 @@ define <4 x double> @test_addsubpd(<4 x
 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_addsubps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_addsubps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -129,6 +154,13 @@ define <8 x float> @test_addsubps(<8 x f
 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_andnotpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andnotpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -170,6 +202,13 @@ define <4 x double> @test_andnotpd(<4 x
 }
 
 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_andnotps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andnotps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -211,6 +250,13 @@ define <8 x float> @test_andnotps(<8 x f
 }
 
 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_andpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -250,6 +296,13 @@ define <4 x double> @test_andpd(<4 x dou
 }
 
 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_andps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_andps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -289,6 +342,13 @@ define <8 x float> @test_andps(<8 x floa
 }
 
 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_blendpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
@@ -324,6 +384,12 @@ define <4 x double> @test_blendpd(<4 x d
 }
 
 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_blendps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
@@ -354,6 +420,12 @@ define <8 x float> @test_blendps(<8 x fl
 }
 
 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
+; GENERIC-LABEL: test_blendvpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; GENERIC-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -385,6 +457,12 @@ define <4 x double> @test_blendvpd(<4 x
 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
+; GENERIC-LABEL: test_blendvps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; GENERIC-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -416,6 +494,11 @@ define <8 x float> @test_blendvps(<8 x f
 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 
 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
+; GENERIC-LABEL: test_broadcastf128:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_broadcastf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
@@ -441,6 +524,11 @@ define <8 x float> @test_broadcastf128(<
 }
 
 define <4 x double> @test_broadcastsd_ymm(double *%a0) {
+; GENERIC-LABEL: test_broadcastsd_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_broadcastsd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
@@ -467,6 +555,11 @@ define <4 x double> @test_broadcastsd_ym
 }
 
 define <4 x float> @test_broadcastss(float *%a0) {
+; GENERIC-LABEL: test_broadcastss:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_broadcastss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
@@ -493,6 +586,11 @@ define <4 x float> @test_broadcastss(flo
 }
 
 define <8 x float> @test_broadcastss_ymm(float *%a0) {
+; GENERIC-LABEL: test_broadcastss_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_broadcastss_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
@@ -519,6 +617,13 @@ define <8 x float> @test_broadcastss_ymm
 }
 
 define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_cmppd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; GENERIC-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmppd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
@@ -557,6 +662,13 @@ define <4 x double> @test_cmppd(<4 x dou
 }
 
 define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_cmpps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; GENERIC-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cmpps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
@@ -595,6 +707,13 @@ define <8 x float> @test_cmpps(<8 x floa
 }
 
 define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
+; GENERIC-LABEL: test_cvtdq2pd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtdq2pd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
@@ -630,6 +749,13 @@ define <4 x double> @test_cvtdq2pd(<4 x
 }
 
 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
+; GENERIC-LABEL: test_cvtdq2ps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtdq2ps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
@@ -667,6 +793,13 @@ define <8 x float> @test_cvtdq2ps(<8 x i
 }
 
 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_cvtpd2dq:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtpd2dq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
@@ -702,6 +835,13 @@ define <8 x i32> @test_cvtpd2dq(<4 x dou
 }
 
 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_cvtpd2ps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
+; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtpd2ps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
@@ -737,6 +877,13 @@ define <8 x float> @test_cvtpd2ps(<4 x d
 }
 
 define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_cvtps2dq:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_cvtps2dq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
@@ -772,6 +919,12 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 }
 
 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_divpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
+; GENERIC-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
@@ -802,6 +955,12 @@ define <4 x double> @test_divpd(<4 x dou
 }
 
 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_divps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
+; GENERIC-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_divps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
@@ -832,6 +991,12 @@ define <8 x float> @test_divps(<8 x floa
 }
 
 define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_dpps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
+; GENERIC-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
@@ -863,6 +1028,13 @@ define <8 x float> @test_dpps(<8 x float
 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_extractf128:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_extractf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
@@ -896,6 +1068,12 @@ define <4 x float> @test_extractf128(<8
 }
 
 define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_haddpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_haddpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -927,6 +1105,12 @@ define <4 x double> @test_haddpd(<4 x do
 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_haddps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; GENERIC-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_haddps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
@@ -958,6 +1142,12 @@ define <8 x float> @test_haddps(<8 x flo
 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_hsubpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; GENERIC-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_hsubpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
@@ -989,6 +1179,12 @@ define <4 x double> @test_hsubpd(<4 x do
 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_hsubps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; GENERIC-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_hsubps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
@@ -1020,6 +1216,13 @@ define <8 x float> @test_hsubps(<8 x flo
 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_insertf128:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
+; GENERIC-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_insertf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
@@ -1057,6 +1260,11 @@ define <8 x float> @test_insertf128(<8 x
 }
 
 define <32 x i8> @test_lddqu(i8* %a0) {
+; GENERIC-LABEL: test_lddqu:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vlddqu (%rdi), %ymm0 # sched: [6:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_lddqu:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vlddqu (%rdi), %ymm0 # sched: [6:0.50]
@@ -1082,6 +1290,13 @@ define <32 x i8> @test_lddqu(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
+; GENERIC-LABEL: test_maskmovpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; GENERIC-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maskmovpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
@@ -1117,6 +1332,13 @@ declare <2 x double> @llvm.x86.avx.maskl
 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
 
 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
+; GENERIC-LABEL: test_maskmovpd_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
+; GENERIC-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi)
+; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maskmovpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
@@ -1152,6 +1374,13 @@ declare <4 x double> @llvm.x86.avx.maskl
 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
 
 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
+; GENERIC-LABEL: test_maskmovps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; GENERIC-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maskmovps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
@@ -1187,6 +1416,13 @@ declare <4 x float> @llvm.x86.avx.masklo
 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
 
 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
+; GENERIC-LABEL: test_maskmovps_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
+; GENERIC-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi)
+; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maskmovps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
@@ -1222,6 +1458,12 @@ declare <8 x float> @llvm.x86.avx.masklo
 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
 
 define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_maxpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1253,6 +1495,12 @@ define <4 x double> @test_maxpd(<4 x dou
 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_maxps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_maxps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1284,6 +1532,12 @@ define <8 x float> @test_maxps(<8 x floa
 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_minpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1315,6 +1569,12 @@ define <4 x double> @test_minpd(<4 x dou
 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_minps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_minps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1346,6 +1606,13 @@ define <8 x float> @test_minps(<8 x floa
 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_movapd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovapd %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movapd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
@@ -1380,6 +1647,13 @@ define <4 x double> @test_movapd(<4 x do
 }
 
 define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_movaps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovaps %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movaps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
@@ -1414,6 +1688,13 @@ define <8 x float> @test_movaps(<8 x flo
 }
 
 define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_movddup:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
+; GENERIC-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movddup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
@@ -1449,6 +1730,12 @@ define <4 x double> @test_movddup(<4 x d
 }
 
 define i32 @test_movmskpd(<4 x double> %a0) {
+; GENERIC-LABEL: test_movmskpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movmskpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
@@ -1477,6 +1764,12 @@ define i32 @test_movmskpd(<4 x double> %
 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
 
 define i32 @test_movmskps(<8 x float> %a0) {
+; GENERIC-LABEL: test_movmskps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
@@ -1505,6 +1798,12 @@ define i32 @test_movmskps(<8 x float> %a
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_movntpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1534,6 +1833,12 @@ define <4 x double> @test_movntpd(<4 x d
 }
 
 define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_movntps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovntps %ymm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movntps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
@@ -1563,6 +1868,13 @@ define <8 x float> @test_movntps(<8 x fl
 }
 
 define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_movshdup:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
+; GENERIC-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movshdup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
@@ -1598,6 +1910,13 @@ define <8 x float> @test_movshdup(<8 x f
 }
 
 define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_movsldup:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
+; GENERIC-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movsldup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
@@ -1633,6 +1952,13 @@ define <8 x float> @test_movsldup(<8 x f
 }
 
 define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_movupd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovupd (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovupd %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movupd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -1669,6 +1995,13 @@ define <4 x double> @test_movupd(<4 x do
 }
 
 define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_movups:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmovups (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovups %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_movups:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
@@ -1705,6 +2038,12 @@ define <8 x float> @test_movups(<8 x flo
 }
 
 define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_mulpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
@@ -1735,6 +2074,12 @@ define <4 x double> @test_mulpd(<4 x dou
 }
 
 define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_mulps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_mulps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
@@ -1765,6 +2110,13 @@ define <8 x float> @test_mulps(<8 x floa
 }
 
 define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: orpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: orpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -1804,6 +2156,13 @@ define <4 x double> @orpd(<4 x double> %
 }
 
 define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_orps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_orps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -1843,6 +2202,13 @@ define <8 x float> @test_orps(<8 x float
 }
 
 define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
+; GENERIC-LABEL: test_permilpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
@@ -1878,6 +2244,13 @@ define <2 x double> @test_permilpd(<2 x
 }
 
 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_permilpd_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
+; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
@@ -1913,6 +2286,13 @@ define <4 x double> @test_permilpd_ymm(<
 }
 
 define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
+; GENERIC-LABEL: test_permilps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
+; GENERIC-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
@@ -1948,6 +2328,13 @@ define <4 x float> @test_permilps(<4 x f
 }
 
 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_permilps_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
@@ -1983,6 +2370,12 @@ define <8 x float> @test_permilps_ymm(<8
 }
 
 define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
+; GENERIC-LABEL: test_permilvarpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilvarpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -2014,6 +2407,12 @@ define <2 x double> @test_permilvarpd(<2
 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
 
 define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_permilvarpd_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilvarpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -2045,6 +2444,12 @@ define <4 x double> @test_permilvarpd_ym
 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
 
 define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+; GENERIC-LABEL: test_permilvarps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilvarps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -2076,6 +2481,12 @@ define <4 x float> @test_permilvarps(<4
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
 
 define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
+; GENERIC-LABEL: test_permilvarps_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_permilvarps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -2107,6 +2518,13 @@ define <8 x float> @test_permilvarps_ymm
 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
 
 define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_rcpps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vrcpps (%rdi), %ymm1 # sched: [9:1.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rcpps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
@@ -2143,6 +2561,13 @@ define <8 x float> @test_rcpps(<8 x floa
 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_roundpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
@@ -2179,6 +2604,13 @@ define <4 x double> @test_roundpd(<4 x d
 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
 
 define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_roundps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_roundps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
@@ -2215,6 +2647,13 @@ define <8 x float> @test_roundps(<8 x fl
 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
 
 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_rsqrtps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
+; GENERIC-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:3.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_rsqrtps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
@@ -2251,6 +2690,13 @@ define <8 x float> @test_rsqrtps(<8 x fl
 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_shufpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
+; GENERIC-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_shufpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
@@ -2286,6 +2732,12 @@ define <4 x double> @test_shufpd(<4 x do
 }
 
 define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
+; GENERIC-LABEL: test_shufps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
+; GENERIC-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_shufps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
@@ -2316,6 +2768,13 @@ define <8 x float> @test_shufps(<8 x flo
 }
 
 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_sqrtpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
+; GENERIC-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:3.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
@@ -2352,6 +2811,13 @@ define <4 x double> @test_sqrtpd(<4 x do
 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
 
 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_sqrtps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
+; GENERIC-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:3.00]
+; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_sqrtps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
@@ -2388,6 +2854,12 @@ define <8 x float> @test_sqrtps(<8 x flo
 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_subpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -2418,6 +2890,12 @@ define <4 x double> @test_subpd(<4 x dou
 }
 
 define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_subps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_subps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
@@ -2448,6 +2926,15 @@ define <8 x float> @test_subps(<8 x floa
 }
 
 define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
+; GENERIC-LABEL: test_testpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_testpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
@@ -2492,6 +2979,16 @@ define i32 @test_testpd(<2 x double> %a0
 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
 
 define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_testpd_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_testpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
@@ -2539,6 +3036,15 @@ define i32 @test_testpd_ymm(<4 x double>
 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
 
 define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
+; GENERIC-LABEL: test_testps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_testps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
@@ -2583,6 +3089,16 @@ define i32 @test_testps(<4 x float> %a0,
 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_testps_ymm:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_testps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
@@ -2630,6 +3146,13 @@ define i32 @test_testps_ymm(<8 x float>
 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
 
 define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_unpckhpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpckhpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
@@ -2665,6 +3188,12 @@ define <4 x double> @test_unpckhpd(<4 x
 }
 
 define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
+; GENERIC-LABEL: test_unpckhps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpckhps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
@@ -2695,6 +3224,13 @@ define <8 x float> @test_unpckhps(<8 x f
 }
 
 define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_unpcklpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
+; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpcklpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
@@ -2730,6 +3266,12 @@ define <4 x double> @test_unpcklpd(<4 x
 }
 
 define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
+; GENERIC-LABEL: test_unpcklps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_unpcklps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
@@ -2760,6 +3302,13 @@ define <8 x float> @test_unpcklps(<8 x f
 }
 
 define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
+; GENERIC-LABEL: test_xorpd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_xorpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -2799,6 +3348,13 @@ define <4 x double> @test_xorpd(<4 x dou
 }
 
 define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
+; GENERIC-LABEL: test_xorps:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_xorps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -2838,6 +3394,11 @@ define <8 x float> @test_xorps(<8 x floa
 }
 
 define void @test_zeroall() {
+; GENERIC-LABEL: test_zeroall:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vzeroall
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_zeroall:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vzeroall
@@ -2863,6 +3424,11 @@ define void @test_zeroall() {
 declare void @llvm.x86.avx.vzeroall() nounwind
 
 define void @test_zeroupper() {
+; GENERIC-LABEL: test_zeroupper:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; SANDY-LABEL: test_zeroupper:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,9 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
 
 define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
+; GENERIC-LABEL: test_pabsb:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpabsb %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpabsb (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pabsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsb %ymm0, %ymm0 # sched: [1:0.50]
@@ -26,6 +34,13 @@ define <32 x i8> @test_pabsb(<32 x i8> %
 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
 
 define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
+; GENERIC-LABEL: test_pabsd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpabsd %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpabsd (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pabsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsd %ymm0, %ymm0 # sched: [1:0.50]
@@ -48,6 +63,13 @@ define <8 x i32> @test_pabsd(<8 x i32> %
 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
 
 define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
+; GENERIC-LABEL: test_pabsw:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpabsw %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpabsw (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pabsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsw %ymm0, %ymm0 # sched: [1:0.50]
@@ -70,6 +92,12 @@ define <16 x i16> @test_pabsw(<16 x i16>
 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
 
 define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
+; GENERIC-LABEL: test_paddb:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_paddb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -88,6 +116,12 @@ define <32 x i8> @test_paddb(<32 x i8> %
 }
 
 define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
+; GENERIC-LABEL: test_paddd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_paddd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -106,6 +140,12 @@ define <8 x i32> @test_paddd(<8 x i32> %
 }
 
 define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_paddq:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_paddq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -124,6 +164,12 @@ define <4 x i64> @test_paddq(<4 x i64> %
 }
 
 define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
+; GENERIC-LABEL: test_paddw:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_paddw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -142,6 +188,13 @@ define <16 x i16> @test_paddw(<16 x i16>
 }
 
 define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_pand:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pand:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
@@ -163,6 +216,13 @@ define <4 x i64> @test_pand(<4 x i64> %a
 }
 
 define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_pandn:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pandn:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
@@ -186,6 +246,12 @@ define <4 x i64> @test_pandn(<4 x i64> %
 }
 
 define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
+; GENERIC-LABEL: test_pmulld:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pmulld:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
@@ -204,6 +270,12 @@ define <8 x i32> @test_pmulld(<8 x i32>
 }
 
 define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
+; GENERIC-LABEL: test_pmullw:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pmullw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
@@ -222,6 +294,13 @@ define <16 x i16> @test_pmullw(<16 x i16
 }
 
 define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_por:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_por:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
@@ -243,6 +322,12 @@ define <4 x i64> @test_por(<4 x i64> %a0
 }
 
 define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
+; GENERIC-LABEL: test_psubb:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_psubb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -261,6 +346,12 @@ define <32 x i8> @test_psubb(<32 x i8> %
 }
 
 define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
+; GENERIC-LABEL: test_psubd:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_psubd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -279,6 +370,12 @@ define <8 x i32> @test_psubd(<8 x i32> %
 }
 
 define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_psubq:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_psubq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -297,6 +394,12 @@ define <4 x i64> @test_psubq(<4 x i64> %
 }
 
 define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
+; GENERIC-LABEL: test_psubw:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_psubw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
@@ -315,6 +418,13 @@ define <16 x i16> @test_psubw(<16 x i16>
 }
 
 define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
+; GENERIC-LABEL: test_pxor:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; HASWELL-LABEL: test_pxor:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]

Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+bmi   | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
@@ -9,12 +9,12 @@
 define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
 ; GENERIC-LABEL: test_andn_i16:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andnl %esi, %edi, %eax
-; GENERIC-NEXT:    notl %edi
-; GENERIC-NEXT:    andw (%rdx), %di
-; GENERIC-NEXT:    addl %edi, %eax
+; GENERIC-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
+; GENERIC-NEXT:    andw (%rdx), %di # sched: [5:0.50]
+; GENERIC-NEXT:    addl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andn_i16:
 ; HASWELL:       # BB#0:
@@ -53,10 +53,10 @@ define i16 @test_andn_i16(i16 zeroext %a
 define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_andn_i32:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andnl %esi, %edi, %ecx
-; GENERIC-NEXT:    andnl (%rdx), %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andn_i32:
 ; HASWELL:       # BB#0:
@@ -89,10 +89,10 @@ define i32 @test_andn_i32(i32 %a0, i32 %
 define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_andn_i64:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx
-; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.33]
+; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andn_i64:
 ; HASWELL:       # BB#0:
@@ -127,8 +127,8 @@ define i32 @test_bextr_i32(i32 %a0, i32
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx
 ; GENERIC-NEXT:    bextrl %edi, %esi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bextr_i32:
 ; HASWELL:       # BB#0:
@@ -163,8 +163,8 @@ define i64 @test_bextr_i64(i64 %a0, i64
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx
 ; GENERIC-NEXT:    bextrq %rdi, %rsi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bextr_i64:
 ; HASWELL:       # BB#0:
@@ -199,8 +199,8 @@ define i32 @test_blsi_i32(i32 %a0, i32 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsil (%rsi), %ecx
 ; GENERIC-NEXT:    blsil %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsi_i32:
 ; HASWELL:       # BB#0:
@@ -236,8 +236,8 @@ define i64 @test_blsi_i64(i64 %a0, i64 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsiq (%rsi), %rcx
 ; GENERIC-NEXT:    blsiq %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsi_i64:
 ; HASWELL:       # BB#0:
@@ -273,8 +273,8 @@ define i32 @test_blsmsk_i32(i32 %a0, i32
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsmskl (%rsi), %ecx
 ; GENERIC-NEXT:    blsmskl %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsmsk_i32:
 ; HASWELL:       # BB#0:
@@ -310,8 +310,8 @@ define i64 @test_blsmsk_i64(i64 %a0, i64
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsmskq (%rsi), %rcx
 ; GENERIC-NEXT:    blsmskq %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsmsk_i64:
 ; HASWELL:       # BB#0:
@@ -347,8 +347,8 @@ define i32 @test_blsr_i32(i32 %a0, i32 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsrl (%rsi), %ecx
 ; GENERIC-NEXT:    blsrl %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsr_i32:
 ; HASWELL:       # BB#0:
@@ -384,8 +384,8 @@ define i64 @test_blsr_i64(i64 %a0, i64 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    blsrq (%rsi), %rcx
 ; GENERIC-NEXT:    blsrq %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blsr_i64:
 ; HASWELL:       # BB#0:
@@ -421,9 +421,9 @@ define i16 @test_cttz_i16(i16 zeroext %a
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    tzcntw (%rsi), %cx
 ; GENERIC-NEXT:    tzcntw %di, %ax
-; GENERIC-NEXT:    orl %ecx, %eax
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cttz_i16:
 ; HASWELL:       # BB#0:
@@ -461,8 +461,8 @@ define i32 @test_cttz_i32(i32 %a0, i32 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    tzcntl (%rsi), %ecx
 ; GENERIC-NEXT:    tzcntl %edi, %eax
-; GENERIC-NEXT:    orl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cttz_i32:
 ; HASWELL:       # BB#0:
@@ -497,8 +497,8 @@ define i64 @test_cttz_i64(i64 %a0, i64 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    tzcntq (%rsi), %rcx
 ; GENERIC-NEXT:    tzcntq %rdi, %rax
-; GENERIC-NEXT:    orq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cttz_i64:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+bmi2  | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
@@ -10,8 +10,8 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    bzhil %edi, (%rdx), %ecx
 ; GENERIC-NEXT:    bzhil %edi, %esi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bzhi_i32:
 ; HASWELL:       # BB#0:
@@ -39,8 +39,8 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    bzhiq %rdi, (%rdx), %rcx
 ; GENERIC-NEXT:    bzhiq %rdi, %rsi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bzhi_i64:
 ; HASWELL:       # BB#0:
@@ -68,8 +68,8 @@ define i32 @test_pdep_i32(i32 %a0, i32 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    pdepl (%rdx), %edi, %ecx
 ; GENERIC-NEXT:    pdepl %esi, %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pdep_i32:
 ; HASWELL:       # BB#0:
@@ -97,8 +97,8 @@ define i64 @test_pdep_i64(i64 %a0, i64 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    pdepq (%rdx), %rdi, %rcx
 ; GENERIC-NEXT:    pdepq %rsi, %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pdep_i64:
 ; HASWELL:       # BB#0:
@@ -126,8 +126,8 @@ define i32 @test_pext_i32(i32 %a0, i32 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    pextl (%rdx), %edi, %ecx
 ; GENERIC-NEXT:    pextl %esi, %edi, %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pext_i32:
 ; HASWELL:       # BB#0:
@@ -155,8 +155,8 @@ define i64 @test_pext_i64(i64 %a0, i64 %
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    pextq (%rdx), %rdi, %rcx
 ; GENERIC-NEXT:    pextq %rsi, %rdi, %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pext_i64:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
@@ -6,6 +7,13 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
 
 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
+; GENERIC-LABEL: test_vcvtph2ps_128:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; IVY-LABEL: test_vcvtph2ps_128:
 ; IVY:       # BB#0:
 ; IVY-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
@@ -42,6 +50,13 @@ define <4 x float> @test_vcvtph2ps_128(<
 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
 
 define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
+; GENERIC-LABEL: test_vcvtph2ps_256:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; IVY-LABEL: test_vcvtph2ps_256:
 ; IVY:       # BB#0:
 ; IVY-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
@@ -78,6 +93,12 @@ define <8 x float> @test_vcvtph2ps_256(<
 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
 
 define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) {
+; GENERIC-LABEL: test_vcvtps2ph_128:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; IVY-LABEL: test_vcvtps2ph_128:
 ; IVY:       # BB#0:
 ; IVY-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
@@ -110,6 +131,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4
 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32)
 
 define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) {
+; GENERIC-LABEL: test_vcvtps2ph_256:
+; GENERIC:       # BB#0:
+; GENERIC-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
+; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
 ; IVY-LABEL: test_vcvtps2ph_256:
 ; IVY:       # BB#0:
 ; IVY-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]

Modified: llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
@@ -11,9 +11,9 @@ define i16 @test_ctlz_i16(i16 zeroext %a
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    lzcntw (%rsi), %cx
 ; GENERIC-NEXT:    lzcntw %di, %ax
-; GENERIC-NEXT:    orl %ecx, %eax
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ctlz_i16:
 ; HASWELL:       # BB#0:
@@ -51,8 +51,8 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    lzcntl (%rsi), %ecx
 ; GENERIC-NEXT:    lzcntl %edi, %eax
-; GENERIC-NEXT:    orl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ctlz_i32:
 ; HASWELL:       # BB#0:
@@ -87,8 +87,8 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    lzcntq (%rsi), %rcx
 ; GENERIC-NEXT:    lzcntq %rdi, %rax
-; GENERIC-NEXT:    orq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ctlz_i64:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+popcnt    | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm         | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont    | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -13,11 +13,11 @@
 define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
 ; GENERIC-LABEL: test_ctpop_i16:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    popcntw (%rsi), %cx
-; GENERIC-NEXT:    popcntw %di, %ax
-; GENERIC-NEXT:    orl %ecx, %eax
+; GENERIC-NEXT:    popcntw (%rsi), %cx # sched: [7:1.00]
+; GENERIC-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_ctpop_i16:
 ; SLM:       # BB#0:
@@ -69,10 +69,10 @@ declare i16 @llvm.ctpop.i16(i16)
 define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_ctpop_i32:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    popcntl (%rsi), %ecx
-; GENERIC-NEXT:    popcntl %edi, %eax
-; GENERIC-NEXT:    orl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    popcntl (%rsi), %ecx # sched: [7:1.00]
+; GENERIC-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_ctpop_i32:
 ; SLM:       # BB#0:
@@ -119,10 +119,10 @@ declare i32 @llvm.ctpop.i32(i32)
 define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_ctpop_i64:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    popcntq (%rsi), %rcx
-; GENERIC-NEXT:    popcntq %rdi, %rax
-; GENERIC-NEXT:    orq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    popcntq (%rsi), %rcx # sched: [9:1.00]
+; GENERIC-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_ctpop_i64:
 ; SLM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -12,9 +12,9 @@
 define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_addps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    addps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addps:
 ; ATOM:       # BB#0:
@@ -60,9 +60,9 @@ define <4 x float> @test_addps(<4 x floa
 define float @test_addss(float %a0, float %a1, float *%a2) {
 ; GENERIC-LABEL: test_addss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addss %xmm1, %xmm0
-; GENERIC-NEXT:    addss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addss (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addss:
 ; ATOM:       # BB#0:
@@ -108,9 +108,9 @@ define float @test_addss(float %a0, floa
 define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_andps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andps %xmm1, %xmm0
-; GENERIC-NEXT:    andps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_andps:
 ; ATOM:       # BB#0:
@@ -168,9 +168,9 @@ define <4 x float> @test_andps(<4 x floa
 define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_andnotps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andnps %xmm1, %xmm0
-; GENERIC-NEXT:    andnps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_andnotps:
 ; ATOM:       # BB#0:
@@ -230,10 +230,10 @@ define <4 x float> @test_andnotps(<4 x f
 define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_cmpps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cmpeqps %xmm0, %xmm1
-; GENERIC-NEXT:    cmpeqps (%rdi), %xmm0
-; GENERIC-NEXT:    orps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cmpps:
 ; ATOM:       # BB#0:
@@ -288,9 +288,9 @@ define <4 x float> @test_cmpps(<4 x floa
 define float @test_cmpss(float %a0, float %a1, float *%a2) {
 ; GENERIC-LABEL: test_cmpss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cmpeqss %xmm1, %xmm0
-; GENERIC-NEXT:    cmpeqss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cmpss:
 ; ATOM:       # BB#0:
@@ -341,17 +341,17 @@ declare <4 x float> @llvm.x86.sse.cmp.ss
 define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_comiss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    comiss %xmm1, %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %cl
-; GENERIC-NEXT:    andb %al, %cl
-; GENERIC-NEXT:    comiss (%rdi), %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %dl
-; GENERIC-NEXT:    andb %al, %dl
-; GENERIC-NEXT:    orb %cl, %dl
-; GENERIC-NEXT:    movzbl %dl, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
+; GENERIC-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_comiss:
 ; ATOM:       # BB#0:
@@ -447,10 +447,10 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x
 define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_cvtsi2ss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsi2ssl %edi, %xmm1
-; GENERIC-NEXT:    cvtsi2ssl (%rsi), %xmm0
-; GENERIC-NEXT:    addss %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
+; GENERIC-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsi2ss:
 ; ATOM:       # BB#0:
@@ -503,10 +503,10 @@ define float @test_cvtsi2ss(i32 %a0, i32
 define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_cvtsi2ssq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsi2ssq %rdi, %xmm1
-; GENERIC-NEXT:    cvtsi2ssq (%rsi), %xmm0
-; GENERIC-NEXT:    addss %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; GENERIC-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsi2ssq:
 ; ATOM:       # BB#0:
@@ -559,10 +559,10 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 define i32 @test_cvtss2si(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_cvtss2si:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtss2si %xmm0, %ecx
-; GENERIC-NEXT:    cvtss2si (%rdi), %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; GENERIC-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtss2si:
 ; ATOM:       # BB#0:
@@ -618,10 +618,10 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x
 define i64 @test_cvtss2siq(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_cvtss2siq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtss2si %xmm0, %rcx
-; GENERIC-NEXT:    cvtss2si (%rdi), %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; GENERIC-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtss2siq:
 ; ATOM:       # BB#0:
@@ -677,10 +677,10 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4
 define i32 @test_cvttss2si(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_cvttss2si:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttss2si %xmm0, %ecx
-; GENERIC-NEXT:    cvttss2si (%rdi), %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; GENERIC-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttss2si:
 ; ATOM:       # BB#0:
@@ -733,10 +733,10 @@ define i32 @test_cvttss2si(float %a0, fl
 define i64 @test_cvttss2siq(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_cvttss2siq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttss2si %xmm0, %rcx
-; GENERIC-NEXT:    cvttss2si (%rdi), %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; GENERIC-NEXT:    cvttss2si (%rdi), %rax # sched: [9:1.00]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttss2siq:
 ; ATOM:       # BB#0:
@@ -789,9 +789,9 @@ define i64 @test_cvttss2siq(float %a0, f
 define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_divps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    divps %xmm1, %xmm0
-; GENERIC-NEXT:    divps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    divps %xmm1, %xmm0 # sched: [14:1.00]
+; GENERIC-NEXT:    divps (%rdi), %xmm0 # sched: [20:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_divps:
 ; ATOM:       # BB#0:
@@ -837,9 +837,9 @@ define <4 x float> @test_divps(<4 x floa
 define float @test_divss(float %a0, float %a1, float *%a2) {
 ; GENERIC-LABEL: test_divss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    divss %xmm1, %xmm0
-; GENERIC-NEXT:    divss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    divss %xmm1, %xmm0 # sched: [14:1.00]
+; GENERIC-NEXT:    divss (%rdi), %xmm0 # sched: [20:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_divss:
 ; ATOM:       # BB#0:
@@ -885,9 +885,9 @@ define float @test_divss(float %a0, floa
 define void @test_ldmxcsr(i32 %a0) {
 ; GENERIC-LABEL: test_ldmxcsr:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
-; GENERIC-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; GENERIC-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_ldmxcsr:
 ; ATOM:       # BB#0:
@@ -935,9 +935,9 @@ declare void @llvm.x86.sse.ldmxcsr(i8*)
 define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_maxps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    maxps %xmm1, %xmm0
-; GENERIC-NEXT:    maxps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_maxps:
 ; ATOM:       # BB#0:
@@ -984,9 +984,9 @@ declare <4 x float> @llvm.x86.sse.max.ps
 define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_maxss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    maxss %xmm1, %xmm0
-; GENERIC-NEXT:    maxss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    maxss (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_maxss:
 ; ATOM:       # BB#0:
@@ -1033,9 +1033,9 @@ declare <4 x float> @llvm.x86.sse.max.ss
 define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_minps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    minps %xmm1, %xmm0
-; GENERIC-NEXT:    minps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_minps:
 ; ATOM:       # BB#0:
@@ -1082,9 +1082,9 @@ declare <4 x float> @llvm.x86.sse.min.ps
 define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_minss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    minss %xmm1, %xmm0
-; GENERIC-NEXT:    minss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    minss (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_minss:
 ; ATOM:       # BB#0:
@@ -1131,10 +1131,10 @@ declare <4 x float> @llvm.x86.sse.min.ss
 define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_movaps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movaps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm0, %xmm0
-; GENERIC-NEXT:    movaps %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movaps %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movaps:
 ; ATOM:       # BB#0:
@@ -1188,8 +1188,8 @@ define void @test_movaps(<4 x float> *%a
 define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
 ; GENERIC-LABEL: test_movhlps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movhlps:
 ; ATOM:       # BB#0:
@@ -1235,11 +1235,11 @@ define <4 x float> @test_movhlps(<4 x fl
 define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
 ; GENERIC-LABEL: test_movhps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; GENERIC-NEXT:    addps %xmm0, %xmm1
-; GENERIC-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
-; GENERIC-NEXT:    movlps %xmm1, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
+; GENERIC-NEXT:    movlps %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movhps:
 ; ATOM:       # BB#0:
@@ -1298,9 +1298,9 @@ define void @test_movhps(<4 x float> %a0
 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
 ; GENERIC-LABEL: test_movlhps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movlhps:
 ; ATOM:       # BB#0:
@@ -1345,10 +1345,10 @@ define <4 x float> @test_movlhps(<4 x fl
 define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
 ; GENERIC-LABEL: test_movlps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
-; GENERIC-NEXT:    addps %xmm0, %xmm1
-; GENERIC-NEXT:    movlps %xmm1, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; GENERIC-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    movlps %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movlps:
 ; ATOM:       # BB#0:
@@ -1404,8 +1404,8 @@ define void @test_movlps(<4 x float> %a0
 define i32 @test_movmskps(<4 x float> %a0) {
 ; GENERIC-LABEL: test_movmskps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movmskps %xmm0, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movmskps:
 ; ATOM:       # BB#0:
@@ -1446,8 +1446,8 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x
 define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_movntps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movntps %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movntps %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movntps:
 ; ATOM:       # BB#0:
@@ -1491,10 +1491,10 @@ define void @test_movntps(<4 x float> %a
 define void @test_movss_mem(float* %a0, float* %a1) {
 ; GENERIC-LABEL: test_movss_mem:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; GENERIC-NEXT:    addss %xmm0, %xmm0
-; GENERIC-NEXT:    movss %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movss %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movss_mem:
 ; ATOM:       # BB#0:
@@ -1546,8 +1546,8 @@ define void @test_movss_mem(float* %a0,
 define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
 ; GENERIC-LABEL: test_movss_reg:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movss_reg:
 ; ATOM:       # BB#0:
@@ -1591,10 +1591,10 @@ define <4 x float> @test_movss_reg(<4 x
 define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_movups:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movups (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm0, %xmm0
-; GENERIC-NEXT:    movups %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movups %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movups:
 ; ATOM:       # BB#0:
@@ -1646,9 +1646,9 @@ define void @test_movups(<4 x float> *%a
 define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_mulps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mulps %xmm1, %xmm0
-; GENERIC-NEXT:    mulps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mulps %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    mulps (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mulps:
 ; ATOM:       # BB#0:
@@ -1694,9 +1694,9 @@ define <4 x float> @test_mulps(<4 x floa
 define float @test_mulss(float %a0, float %a1, float *%a2) {
 ; GENERIC-LABEL: test_mulss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mulss %xmm1, %xmm0
-; GENERIC-NEXT:    mulss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mulss %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    mulss (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mulss:
 ; ATOM:       # BB#0:
@@ -1742,9 +1742,9 @@ define float @test_mulss(float %a0, floa
 define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_orps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    orps %xmm1, %xmm0
-; GENERIC-NEXT:    orps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_orps:
 ; ATOM:       # BB#0:
@@ -1802,8 +1802,8 @@ define <4 x float> @test_orps(<4 x float
 define void @test_prefetchnta(i8* %a0) {
 ; GENERIC-LABEL: test_prefetchnta:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    prefetchnta (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_prefetchnta:
 ; ATOM:       # BB#0:
@@ -1848,10 +1848,10 @@ declare void @llvm.prefetch(i8* nocaptur
 define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_rcpps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    rcpps %xmm0, %xmm1
-; GENERIC-NEXT:    rcpps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_rcpps:
 ; ATOM:       # BB#0:
@@ -1909,11 +1909,11 @@ declare <4 x float> @llvm.x86.sse.rcp.ps
 define <4 x float> @test_rcpss(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_rcpss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    rcpss %xmm0, %xmm0
-; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; GENERIC-NEXT:    rcpss %xmm1, %xmm1
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_rcpss:
 ; ATOM:       # BB#0:
@@ -1975,10 +1975,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ss
 define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_rsqrtps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    rsqrtps %xmm0, %xmm1
-; GENERIC-NEXT:    rsqrtps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_rsqrtps:
 ; ATOM:       # BB#0:
@@ -2036,11 +2036,11 @@ declare <4 x float> @llvm.x86.sse.rsqrt.
 define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_rsqrtss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    rsqrtss %xmm0, %xmm0
-; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; GENERIC-NEXT:    rsqrtss %xmm1, %xmm1
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_rsqrtss:
 ; ATOM:       # BB#0:
@@ -2102,8 +2102,8 @@ declare <4 x float> @llvm.x86.sse.rsqrt.
 define void @test_sfence() {
 ; GENERIC-LABEL: test_sfence:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    sfence
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    sfence # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_sfence:
 ; ATOM:       # BB#0:
@@ -2148,9 +2148,9 @@ declare void @llvm.x86.sse.sfence() noun
 define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
 ; GENERIC-LABEL: test_shufps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; GENERIC-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; GENERIC-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_shufps:
 ; ATOM:       # BB#0:
@@ -2200,10 +2200,10 @@ define <4 x float> @test_shufps(<4 x flo
 define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_sqrtps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    sqrtps %xmm0, %xmm1
-; GENERIC-NEXT:    sqrtps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    sqrtps %xmm0, %xmm1 # sched: [14:1.00]
+; GENERIC-NEXT:    sqrtps (%rdi), %xmm0 # sched: [20:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_sqrtps:
 ; ATOM:       # BB#0:
@@ -2260,11 +2260,11 @@ declare <4 x float> @llvm.x86.sse.sqrt.p
 define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_sqrtss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    sqrtss %xmm0, %xmm0
-; GENERIC-NEXT:    movaps (%rdi), %xmm1
-; GENERIC-NEXT:    sqrtss %xmm1, %xmm1
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    sqrtss %xmm0, %xmm0 # sched: [14:1.00]
+; GENERIC-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
+; GENERIC-NEXT:    sqrtss %xmm1, %xmm1 # sched: [14:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_sqrtss:
 ; ATOM:       # BB#0:
@@ -2324,9 +2324,9 @@ declare <4 x float> @llvm.x86.sse.sqrt.s
 define i32 @test_stmxcsr() {
 ; GENERIC-LABEL: test_stmxcsr:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    stmxcsr -{{[0-9]+}}(%rsp)
-; GENERIC-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_stmxcsr:
 ; ATOM:       # BB#0:
@@ -2374,9 +2374,9 @@ declare void @llvm.x86.sse.stmxcsr(i8*)
 define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_subps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    subps %xmm1, %xmm0
-; GENERIC-NEXT:    subps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_subps:
 ; ATOM:       # BB#0:
@@ -2422,9 +2422,9 @@ define <4 x float> @test_subps(<4 x floa
 define float @test_subss(float %a0, float %a1, float *%a2) {
 ; GENERIC-LABEL: test_subss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    subss %xmm1, %xmm0
-; GENERIC-NEXT:    subss (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    subss (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_subss:
 ; ATOM:       # BB#0:
@@ -2470,17 +2470,17 @@ define float @test_subss(float %a0, floa
 define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_ucomiss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    ucomiss %xmm1, %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %cl
-; GENERIC-NEXT:    andb %al, %cl
-; GENERIC-NEXT:    ucomiss (%rdi), %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %dl
-; GENERIC-NEXT:    andb %al, %dl
-; GENERIC-NEXT:    orb %cl, %dl
-; GENERIC-NEXT:    movzbl %dl, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
+; GENERIC-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_ucomiss:
 ; ATOM:       # BB#0:
@@ -2576,9 +2576,9 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4
 define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_unpckhps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; GENERIC-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; GENERIC-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_unpckhps:
 ; ATOM:       # BB#0:
@@ -2628,9 +2628,9 @@ define <4 x float> @test_unpckhps(<4 x f
 define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_unpcklps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; GENERIC-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; GENERIC-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_unpcklps:
 ; ATOM:       # BB#0:
@@ -2680,9 +2680,9 @@ define <4 x float> @test_unpcklps(<4 x f
 define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_xorps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    xorps %xmm1, %xmm0
-; GENERIC-NEXT:    xorps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_xorps:
 ; ATOM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -12,9 +12,9 @@
 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_addpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    addpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addpd:
 ; ATOM:       # BB#0:
@@ -60,9 +60,9 @@ define <2 x double> @test_addpd(<2 x dou
 define double @test_addsd(double %a0, double %a1, double *%a2) {
 ; GENERIC-LABEL: test_addsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addsd %xmm1, %xmm0
-; GENERIC-NEXT:    addsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addsd:
 ; ATOM:       # BB#0:
@@ -108,10 +108,10 @@ define double @test_addsd(double %a0, do
 define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_andpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andpd %xmm1, %xmm0
-; GENERIC-NEXT:    andpd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_andpd:
 ; ATOM:       # BB#0:
@@ -168,10 +168,10 @@ define <2 x double> @test_andpd(<2 x dou
 define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_andnotpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    andnpd %xmm1, %xmm0
-; GENERIC-NEXT:    andnpd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_andnotpd:
 ; ATOM:       # BB#0:
@@ -230,10 +230,10 @@ define <2 x double> @test_andnotpd(<2 x
 define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_cmppd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cmpeqpd %xmm0, %xmm1
-; GENERIC-NEXT:    cmpeqpd (%rdi), %xmm0
-; GENERIC-NEXT:    orpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cmppd:
 ; ATOM:       # BB#0:
@@ -288,9 +288,9 @@ define <2 x double> @test_cmppd(<2 x dou
 define double @test_cmpsd(double %a0, double %a1, double *%a2) {
 ; GENERIC-LABEL: test_cmpsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cmpeqsd %xmm1, %xmm0
-; GENERIC-NEXT:    cmpeqsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cmpsd:
 ; ATOM:       # BB#0:
@@ -341,17 +341,17 @@ declare <2 x double> @llvm.x86.sse2.cmp.
 define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_comisd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    comisd %xmm1, %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %cl
-; GENERIC-NEXT:    andb %al, %cl
-; GENERIC-NEXT:    comisd (%rdi), %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %dl
-; GENERIC-NEXT:    andb %al, %dl
-; GENERIC-NEXT:    orb %cl, %dl
-; GENERIC-NEXT:    movzbl %dl, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
+; GENERIC-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_comisd:
 ; ATOM:       # BB#0:
@@ -447,10 +447,10 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2
 define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_cvtdq2pd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtdq2pd %xmm0, %xmm1
-; GENERIC-NEXT:    cvtdq2pd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtdq2pd:
 ; ATOM:       # BB#0:
@@ -505,10 +505,10 @@ define <2 x double> @test_cvtdq2pd(<4 x
 define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_cvtdq2ps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtdq2ps %xmm0, %xmm1
-; GENERIC-NEXT:    cvtdq2ps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtdq2ps:
 ; ATOM:       # BB#0:
@@ -562,10 +562,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_cvtpd2dq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtpd2dq %xmm0, %xmm1
-; GENERIC-NEXT:    cvtpd2dq (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtpd2dq:
 ; ATOM:       # BB#0:
@@ -620,10 +620,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2d
 define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_cvtpd2ps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtpd2ps %xmm0, %xmm1
-; GENERIC-NEXT:    cvtpd2ps (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtpd2ps:
 ; ATOM:       # BB#0:
@@ -678,10 +678,10 @@ declare <4 x float> @llvm.x86.sse2.cvtpd
 define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_cvtps2dq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtps2dq %xmm0, %xmm1
-; GENERIC-NEXT:    cvtps2dq (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtps2dq:
 ; ATOM:       # BB#0:
@@ -736,10 +736,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2d
 define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_cvtps2pd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtps2pd %xmm0, %xmm1
-; GENERIC-NEXT:    cvtps2pd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; GENERIC-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtps2pd:
 ; ATOM:       # BB#0:
@@ -795,10 +795,10 @@ define <2 x double> @test_cvtps2pd(<4 x
 define i32 @test_cvtsd2si(double %a0, double *%a1) {
 ; GENERIC-LABEL: test_cvtsd2si:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsd2si %xmm0, %ecx
-; GENERIC-NEXT:    cvtsd2si (%rdi), %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; GENERIC-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsd2si:
 ; ATOM:       # BB#0:
@@ -854,10 +854,10 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x
 define i64 @test_cvtsd2siq(double %a0, double *%a1) {
 ; GENERIC-LABEL: test_cvtsd2siq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsd2si %xmm0, %rcx
-; GENERIC-NEXT:    cvtsd2si (%rdi), %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; GENERIC-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsd2siq:
 ; ATOM:       # BB#0:
@@ -913,11 +913,11 @@ declare i64 @llvm.x86.sse2.cvtsd2si64(<2
 define float @test_cvtsd2ss(double %a0, double *%a1) {
 ; GENERIC-LABEL: test_cvtsd2ss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm1
-; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm0
-; GENERIC-NEXT:    addss %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsd2ss:
 ; ATOM:       # BB#0:
@@ -977,10 +977,10 @@ define float @test_cvtsd2ss(double %a0,
 define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_cvtsi2sd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsi2sdl %edi, %xmm1
-; GENERIC-NEXT:    cvtsi2sdl (%rsi), %xmm0
-; GENERIC-NEXT:    addsd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsi2sd:
 ; ATOM:       # BB#0:
@@ -1033,10 +1033,10 @@ define double @test_cvtsi2sd(i32 %a0, i3
 define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_cvtsi2sdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtsi2sdq %rdi, %xmm1
-; GENERIC-NEXT:    cvtsi2sdq (%rsi), %xmm0
-; GENERIC-NEXT:    addsd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtsi2sdq:
 ; ATOM:       # BB#0:
@@ -1091,11 +1091,11 @@ define double @test_cvtsi2sdq(i64 %a0, i
 define double @test_cvtss2sd(float %a0, float *%a1) {
 ; GENERIC-LABEL: test_cvtss2sd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm1
-; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm0
-; GENERIC-NEXT:    addsd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtss2sd:
 ; ATOM:       # BB#0:
@@ -1155,10 +1155,10 @@ define double @test_cvtss2sd(float %a0,
 define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_cvttpd2dq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttpd2dq %xmm0, %xmm1
-; GENERIC-NEXT:    cvttpd2dq (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; GENERIC-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttpd2dq:
 ; ATOM:       # BB#0:
@@ -1214,10 +1214,10 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_cvttps2dq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttps2dq %xmm0, %xmm1
-; GENERIC-NEXT:    cvttps2dq (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttps2dq:
 ; ATOM:       # BB#0:
@@ -1271,10 +1271,10 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 define i32 @test_cvttsd2si(double %a0, double *%a1) {
 ; GENERIC-LABEL: test_cvttsd2si:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttsd2si %xmm0, %ecx
-; GENERIC-NEXT:    cvttsd2si (%rdi), %eax
-; GENERIC-NEXT:    addl %ecx, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; GENERIC-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttsd2si:
 ; ATOM:       # BB#0:
@@ -1327,10 +1327,10 @@ define i32 @test_cvttsd2si(double %a0, d
 define i64 @test_cvttsd2siq(double %a0, double *%a1) {
 ; GENERIC-LABEL: test_cvttsd2siq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    cvttsd2si %xmm0, %rcx
-; GENERIC-NEXT:    cvttsd2si (%rdi), %rax
-; GENERIC-NEXT:    addq %rcx, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; GENERIC-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttsd2siq:
 ; ATOM:       # BB#0:
@@ -1383,9 +1383,9 @@ define i64 @test_cvttsd2siq(double %a0,
 define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_divpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    divpd %xmm1, %xmm0
-; GENERIC-NEXT:    divpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    divpd %xmm1, %xmm0 # sched: [22:1.00]
+; GENERIC-NEXT:    divpd (%rdi), %xmm0 # sched: [28:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_divpd:
 ; ATOM:       # BB#0:
@@ -1431,9 +1431,9 @@ define <2 x double> @test_divpd(<2 x dou
 define double @test_divsd(double %a0, double %a1, double *%a2) {
 ; GENERIC-LABEL: test_divsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    divsd %xmm1, %xmm0
-; GENERIC-NEXT:    divsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    divsd %xmm1, %xmm0 # sched: [22:1.00]
+; GENERIC-NEXT:    divsd (%rdi), %xmm0 # sched: [28:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_divsd:
 ; ATOM:       # BB#0:
@@ -1479,8 +1479,8 @@ define double @test_divsd(double %a0, do
 define void @test_lfence() {
 ; GENERIC-LABEL: test_lfence:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    lfence
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    lfence # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_lfence:
 ; ATOM:       # BB#0:
@@ -1525,8 +1525,8 @@ declare void @llvm.x86.sse2.lfence() nou
 define void @test_mfence() {
 ; GENERIC-LABEL: test_mfence:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mfence
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mfence # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mfence:
 ; ATOM:       # BB#0:
@@ -1571,8 +1571,8 @@ declare void @llvm.x86.sse2.mfence() nou
 define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
 ; GENERIC-LABEL: test_maskmovdqu:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    maskmovdqu %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_maskmovdqu:
 ; ATOM:       # BB#0:
@@ -1615,9 +1615,9 @@ declare void @llvm.x86.sse2.maskmov.dqu(
 define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_maxpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    maxpd %xmm1, %xmm0
-; GENERIC-NEXT:    maxpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_maxpd:
 ; ATOM:       # BB#0:
@@ -1664,9 +1664,9 @@ declare <2 x double> @llvm.x86.sse2.max.
 define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_maxsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    maxsd %xmm1, %xmm0
-; GENERIC-NEXT:    maxsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_maxsd:
 ; ATOM:       # BB#0:
@@ -1713,9 +1713,9 @@ declare <2 x double> @llvm.x86.sse2.max.
 define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_minpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    minpd %xmm1, %xmm0
-; GENERIC-NEXT:    minpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_minpd:
 ; ATOM:       # BB#0:
@@ -1762,9 +1762,9 @@ declare <2 x double> @llvm.x86.sse2.min.
 define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_minsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    minsd %xmm1, %xmm0
-; GENERIC-NEXT:    minsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_minsd:
 ; ATOM:       # BB#0:
@@ -1811,10 +1811,10 @@ declare <2 x double> @llvm.x86.sse2.min.
 define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_movapd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movapd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm0, %xmm0
-; GENERIC-NEXT:    movapd %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movapd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movapd:
 ; ATOM:       # BB#0:
@@ -1866,10 +1866,10 @@ define void @test_movapd(<2 x double> *%
 define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
 ; GENERIC-LABEL: test_movdqa:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqa (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm0, %xmm0
-; GENERIC-NEXT:    movdqa %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movdqa:
 ; ATOM:       # BB#0:
@@ -1921,10 +1921,10 @@ define void @test_movdqa(<2 x i64> *%a0,
 define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
 ; GENERIC-LABEL: test_movdqu:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqu (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm0, %xmm0
-; GENERIC-NEXT:    movdqu %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movdqu:
 ; ATOM:       # BB#0:
@@ -1976,13 +1976,13 @@ define void @test_movdqu(<2 x i64> *%a0,
 define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_movd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movd %edi, %xmm1
-; GENERIC-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; GENERIC-NEXT:    paddd %xmm0, %xmm1
-; GENERIC-NEXT:    paddd %xmm0, %xmm2
-; GENERIC-NEXT:    movd %xmm2, %eax
-; GENERIC-NEXT:    movd %xmm1, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
+; GENERIC-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; GENERIC-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
+; GENERIC-NEXT:    movd %xmm1, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movd:
 ; ATOM:       # BB#0:
@@ -2057,13 +2057,13 @@ define i32 @test_movd(<4 x i32> %a0, i32
 define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_movd_64:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movq %rdi, %xmm1
-; GENERIC-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
-; GENERIC-NEXT:    paddq %xmm0, %xmm1
-; GENERIC-NEXT:    paddq %xmm0, %xmm2
-; GENERIC-NEXT:    movq %xmm2, %rax
-; GENERIC-NEXT:    movq %xmm1, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
+; GENERIC-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50]
+; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; GENERIC-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
+; GENERIC-NEXT:    movq %xmm1, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movd_64:
 ; ATOM:       # BB#0:
@@ -2138,10 +2138,10 @@ define i64 @test_movd_64(<2 x i64> %a0,
 define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
 ; GENERIC-LABEL: test_movhpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; GENERIC-NEXT:    addpd %xmm0, %xmm1
-; GENERIC-NEXT:    movhpd %xmm1, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movhpd:
 ; ATOM:       # BB#0:
@@ -2196,10 +2196,10 @@ define void @test_movhpd(<2 x double> %a
 define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
 ; GENERIC-LABEL: test_movlpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
-; GENERIC-NEXT:    addpd %xmm0, %xmm1
-; GENERIC-NEXT:    movlpd %xmm1, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movlpd:
 ; ATOM:       # BB#0:
@@ -2254,8 +2254,8 @@ define void @test_movlpd(<2 x double> %a
 define i32 @test_movmskpd(<2 x double> %a0) {
 ; GENERIC-LABEL: test_movmskpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movmskpd %xmm0, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movmskpd:
 ; ATOM:       # BB#0:
@@ -2296,9 +2296,9 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2
 define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
 ; GENERIC-LABEL: test_movntdqa:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddq %xmm0, %xmm0
-; GENERIC-NEXT:    movntdq %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movntdqa:
 ; ATOM:       # BB#0:
@@ -2345,9 +2345,9 @@ define void @test_movntdqa(<2 x i64> %a0
 define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_movntpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addpd %xmm0, %xmm0
-; GENERIC-NEXT:    movntpd %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movntpd:
 ; ATOM:       # BB#0:
@@ -2392,10 +2392,10 @@ define void @test_movntpd(<2 x double> %
 define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_movq_mem:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    movq %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    movq %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movq_mem:
 ; ATOM:       # BB#0:
@@ -2449,9 +2449,9 @@ define <2 x i64> @test_movq_mem(<2 x i64
 define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
 ; GENERIC-LABEL: test_movq_reg:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movq_reg:
 ; ATOM:       # BB#0:
@@ -2498,10 +2498,10 @@ define <2 x i64> @test_movq_reg(<2 x i64
 define void @test_movsd_mem(double* %a0, double* %a1) {
 ; GENERIC-LABEL: test_movsd_mem:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; GENERIC-NEXT:    addsd %xmm0, %xmm0
-; GENERIC-NEXT:    movsd %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; GENERIC-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movsd_mem:
 ; ATOM:       # BB#0:
@@ -2553,9 +2553,9 @@ define void @test_movsd_mem(double* %a0,
 define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
 ; GENERIC-LABEL: test_movsd_reg:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; GENERIC-NEXT:    movapd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; GENERIC-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movsd_reg:
 ; ATOM:       # BB#0:
@@ -2599,10 +2599,10 @@ define <2 x double> @test_movsd_reg(<2 x
 define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_movupd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movupd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm0, %xmm0
-; GENERIC-NEXT:    movupd %xmm0, (%rsi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    movupd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movupd:
 ; ATOM:       # BB#0:
@@ -2654,9 +2654,9 @@ define void @test_movupd(<2 x double> *%
 define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_mulpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mulpd %xmm1, %xmm0
-; GENERIC-NEXT:    mulpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mulpd:
 ; ATOM:       # BB#0:
@@ -2702,9 +2702,9 @@ define <2 x double> @test_mulpd(<2 x dou
 define double @test_mulsd(double %a0, double %a1, double *%a2) {
 ; GENERIC-LABEL: test_mulsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mulsd %xmm1, %xmm0
-; GENERIC-NEXT:    mulsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mulsd:
 ; ATOM:       # BB#0:
@@ -2750,10 +2750,10 @@ define double @test_mulsd(double %a0, do
 define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_orpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    orpd %xmm1, %xmm0
-; GENERIC-NEXT:    orpd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_orpd:
 ; ATOM:       # BB#0:
@@ -2810,9 +2810,9 @@ define <2 x double> @test_orpd(<2 x doub
 define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_packssdw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    packssdw %xmm1, %xmm0
-; GENERIC-NEXT:    packssdw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packssdw:
 ; ATOM:       # BB#0:
@@ -2868,9 +2868,9 @@ declare <8 x i16> @llvm.x86.sse2.packssd
 define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_packsswb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    packsswb %xmm1, %xmm0
-; GENERIC-NEXT:    packsswb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packsswb:
 ; ATOM:       # BB#0:
@@ -2926,9 +2926,9 @@ declare <16 x i8> @llvm.x86.sse2.packssw
 define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_packuswb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    packuswb %xmm1, %xmm0
-; GENERIC-NEXT:    packuswb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packuswb:
 ; ATOM:       # BB#0:
@@ -2984,9 +2984,9 @@ declare <16 x i8> @llvm.x86.sse2.packusw
 define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_paddb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddb %xmm1, %xmm0
-; GENERIC-NEXT:    paddb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddb:
 ; ATOM:       # BB#0:
@@ -3036,9 +3036,9 @@ define <16 x i8> @test_paddb(<16 x i8> %
 define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_paddd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    paddd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddd:
 ; ATOM:       # BB#0:
@@ -3088,9 +3088,9 @@ define <4 x i32> @test_paddd(<4 x i32> %
 define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_paddq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    paddq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddq:
 ; ATOM:       # BB#0:
@@ -3136,9 +3136,9 @@ define <2 x i64> @test_paddq(<2 x i64> %
 define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_paddsb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddsb %xmm1, %xmm0
-; GENERIC-NEXT:    paddsb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsb:
 ; ATOM:       # BB#0:
@@ -3189,9 +3189,9 @@ declare <16 x i8> @llvm.x86.sse2.padds.b
 define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_paddsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddsw %xmm1, %xmm0
-; GENERIC-NEXT:    paddsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsw:
 ; ATOM:       # BB#0:
@@ -3242,9 +3242,9 @@ declare <8 x i16> @llvm.x86.sse2.padds.w
 define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_paddusb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddusb %xmm1, %xmm0
-; GENERIC-NEXT:    paddusb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusb:
 ; ATOM:       # BB#0:
@@ -3295,9 +3295,9 @@ declare <16 x i8> @llvm.x86.sse2.paddus.
 define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_paddusw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddusw %xmm1, %xmm0
-; GENERIC-NEXT:    paddusw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusw:
 ; ATOM:       # BB#0:
@@ -3348,9 +3348,9 @@ declare <8 x i16> @llvm.x86.sse2.paddus.
 define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_paddw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    paddw %xmm1, %xmm0
-; GENERIC-NEXT:    paddw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddw:
 ; ATOM:       # BB#0:
@@ -3400,10 +3400,10 @@ define <8 x i16> @test_paddw(<8 x i16> %
 define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_pand:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pand %xmm1, %xmm0
-; GENERIC-NEXT:    pand (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pand:
 ; ATOM:       # BB#0:
@@ -3456,12 +3456,12 @@ define <2 x i64> @test_pand(<2 x i64> %a
 define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_pandn:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pandn %xmm1, %xmm0
-; GENERIC-NEXT:    movdqa %xmm0, %xmm1
-; GENERIC-NEXT:    pandn (%rdi), %xmm1
-; GENERIC-NEXT:    paddq %xmm0, %xmm1
-; GENERIC-NEXT:    movdqa %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; GENERIC-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pandn:
 ; ATOM:       # BB#0:
@@ -3520,9 +3520,9 @@ define <2 x i64> @test_pandn(<2 x i64> %
 define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pavgb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pavgb %xmm1, %xmm0
-; GENERIC-NEXT:    pavgb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgb:
 ; ATOM:       # BB#0:
@@ -3573,9 +3573,9 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(
 define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pavgw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pavgw %xmm1, %xmm0
-; GENERIC-NEXT:    pavgw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgw:
 ; ATOM:       # BB#0:
@@ -3626,10 +3626,10 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(
 define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpeqb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpeqb %xmm0, %xmm1
-; GENERIC-NEXT:    pcmpeqb (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqb:
 ; ATOM:       # BB#0:
@@ -3685,10 +3685,10 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pcmpeqd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpeqd %xmm0, %xmm1
-; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqd:
 ; ATOM:       # BB#0:
@@ -3744,10 +3744,10 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pcmpeqw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpeqw %xmm0, %xmm1
-; GENERIC-NEXT:    pcmpeqw (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqw:
 ; ATOM:       # BB#0:
@@ -3803,11 +3803,11 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpgtb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2
-; GENERIC-NEXT:    pcmpgtb %xmm1, %xmm2
-; GENERIC-NEXT:    pcmpgtb (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtb:
 ; ATOM:       # BB#0:
@@ -3863,11 +3863,11 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pcmpgtd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2
-; GENERIC-NEXT:    pcmpgtd %xmm1, %xmm2
-; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtd:
 ; ATOM:       # BB#0:
@@ -3923,11 +3923,11 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pcmpgtw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm2
-; GENERIC-NEXT:    pcmpgtw %xmm1, %xmm2
-; GENERIC-NEXT:    pcmpgtw (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtw:
 ; ATOM:       # BB#0:
@@ -3983,9 +3983,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 define i16 @test_pextrw(<8 x i16> %a0) {
 ; GENERIC-LABEL: test_pextrw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pextrw $6, %xmm0, %eax
+; GENERIC-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pextrw:
 ; ATOM:       # BB#0:
@@ -4029,9 +4029,9 @@ define i16 @test_pextrw(<8 x i16> %a0) {
 define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
 ; GENERIC-LABEL: test_pinsrw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pinsrw $1, %edi, %xmm0
-; GENERIC-NEXT:    pinsrw $3, (%rsi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pinsrw:
 ; ATOM:       # BB#0:
@@ -4081,9 +4081,9 @@ define <8 x i16> @test_pinsrw(<8 x i16>
 define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmaddwd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaddwd %xmm1, %xmm0
-; GENERIC-NEXT:    pmaddwd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddwd:
 ; ATOM:       # BB#0:
@@ -4139,9 +4139,9 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.w
 define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmaxsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxsw %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxsw:
 ; ATOM:       # BB#0:
@@ -4192,9 +4192,9 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w
 define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pmaxub:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxub %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxub (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxub:
 ; ATOM:       # BB#0:
@@ -4245,9 +4245,9 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b
 define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pminsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminsw %xmm1, %xmm0
-; GENERIC-NEXT:    pminsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminsw:
 ; ATOM:       # BB#0:
@@ -4298,9 +4298,9 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w
 define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pminub:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminub %xmm1, %xmm0
-; GENERIC-NEXT:    pminub (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminub:
 ; ATOM:       # BB#0:
@@ -4351,8 +4351,8 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b
 define i32 @test_pmovmskb(<16 x i8> %a0) {
 ; GENERIC-LABEL: test_pmovmskb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovmskb %xmm0, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmovmskb:
 ; ATOM:       # BB#0:
@@ -4393,9 +4393,9 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(
 define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmulhuw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmulhuw %xmm1, %xmm0
-; GENERIC-NEXT:    pmulhuw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhuw:
 ; ATOM:       # BB#0:
@@ -4442,9 +4442,9 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.
 define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmulhw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmulhw %xmm1, %xmm0
-; GENERIC-NEXT:    pmulhw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmulhw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmulhw (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhw:
 ; ATOM:       # BB#0:
@@ -4491,9 +4491,9 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w
 define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmullw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmullw %xmm1, %xmm0
-; GENERIC-NEXT:    pmullw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmullw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmullw (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmullw:
 ; ATOM:       # BB#0:
@@ -4539,9 +4539,9 @@ define <8 x i16> @test_pmullw(<8 x i16>
 define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pmuludq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmuludq %xmm1, %xmm0
-; GENERIC-NEXT:    pmuludq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmuludq %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmuludq (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmuludq:
 ; ATOM:       # BB#0:
@@ -4597,10 +4597,10 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.d
 define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_por:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    por (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_por:
 ; ATOM:       # BB#0:
@@ -4653,9 +4653,9 @@ define <2 x i64> @test_por(<2 x i64> %a0
 define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_psadbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psadbw %xmm1, %xmm0
-; GENERIC-NEXT:    psadbw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psadbw:
 ; ATOM:       # BB#0:
@@ -4711,10 +4711,10 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw
 define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_pshufd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; GENERIC-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; GENERIC-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufd:
 ; ATOM:       # BB#0:
@@ -4769,10 +4769,10 @@ define <4 x i32> @test_pshufd(<4 x i32>
 define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_pshufhw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6]
-; GENERIC-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4]
-; GENERIC-NEXT:    paddw %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; GENERIC-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufhw:
 ; ATOM:       # BB#0:
@@ -4827,10 +4827,10 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_pshuflw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7]
-; GENERIC-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7]
-; GENERIC-NEXT:    paddw %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; GENERIC-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshuflw:
 ; ATOM:       # BB#0:
@@ -4885,10 +4885,10 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pslld:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pslld %xmm1, %xmm0
-; GENERIC-NEXT:    pslld (%rdi), %xmm0
-; GENERIC-NEXT:    pslld $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pslld:
 ; ATOM:       # BB#0:
@@ -4943,8 +4943,8 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d
 define <4 x i32> @test_pslldq(<4 x i32> %a0) {
 ; GENERIC-LABEL: test_pslldq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pslldq:
 ; ATOM:       # BB#0:
@@ -4988,10 +4988,10 @@ define <4 x i32> @test_pslldq(<4 x i32>
 define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_psllq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psllq %xmm1, %xmm0
-; GENERIC-NEXT:    psllq (%rdi), %xmm0
-; GENERIC-NEXT:    psllq $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllq:
 ; ATOM:       # BB#0:
@@ -5046,10 +5046,10 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q
 define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psllw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psllw %xmm1, %xmm0
-; GENERIC-NEXT:    psllw (%rdi), %xmm0
-; GENERIC-NEXT:    psllw $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllw:
 ; ATOM:       # BB#0:
@@ -5104,10 +5104,10 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w
 define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_psrad:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psrad %xmm1, %xmm0
-; GENERIC-NEXT:    psrad (%rdi), %xmm0
-; GENERIC-NEXT:    psrad $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrad:
 ; ATOM:       # BB#0:
@@ -5162,10 +5162,10 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d
 define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psraw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psraw %xmm1, %xmm0
-; GENERIC-NEXT:    psraw (%rdi), %xmm0
-; GENERIC-NEXT:    psraw $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psraw:
 ; ATOM:       # BB#0:
@@ -5220,10 +5220,10 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w
 define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_psrld:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psrld %xmm1, %xmm0
-; GENERIC-NEXT:    psrld (%rdi), %xmm0
-; GENERIC-NEXT:    psrld $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrld:
 ; ATOM:       # BB#0:
@@ -5278,8 +5278,8 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d
 define <4 x i32> @test_psrldq(<4 x i32> %a0) {
 ; GENERIC-LABEL: test_psrldq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrldq:
 ; ATOM:       # BB#0:
@@ -5323,10 +5323,10 @@ define <4 x i32> @test_psrldq(<4 x i32>
 define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_psrlq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psrlq %xmm1, %xmm0
-; GENERIC-NEXT:    psrlq (%rdi), %xmm0
-; GENERIC-NEXT:    psrlq $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlq:
 ; ATOM:       # BB#0:
@@ -5381,10 +5381,10 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q
 define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psrlw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psrlw %xmm1, %xmm0
-; GENERIC-NEXT:    psrlw (%rdi), %xmm0
-; GENERIC-NEXT:    psrlw $2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlw:
 ; ATOM:       # BB#0:
@@ -5439,9 +5439,9 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w
 define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_psubb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubb %xmm1, %xmm0
-; GENERIC-NEXT:    psubb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubb:
 ; ATOM:       # BB#0:
@@ -5491,9 +5491,9 @@ define <16 x i8> @test_psubb(<16 x i8> %
 define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_psubd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubd %xmm1, %xmm0
-; GENERIC-NEXT:    psubd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubd:
 ; ATOM:       # BB#0:
@@ -5543,9 +5543,9 @@ define <4 x i32> @test_psubd(<4 x i32> %
 define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_psubq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubq %xmm1, %xmm0
-; GENERIC-NEXT:    psubq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubq:
 ; ATOM:       # BB#0:
@@ -5591,9 +5591,9 @@ define <2 x i64> @test_psubq(<2 x i64> %
 define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_psubsb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubsb %xmm1, %xmm0
-; GENERIC-NEXT:    psubsb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsb:
 ; ATOM:       # BB#0:
@@ -5644,9 +5644,9 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b
 define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psubsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubsw %xmm1, %xmm0
-; GENERIC-NEXT:    psubsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsw:
 ; ATOM:       # BB#0:
@@ -5697,9 +5697,9 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w
 define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_psubusb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubusb %xmm1, %xmm0
-; GENERIC-NEXT:    psubusb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusb:
 ; ATOM:       # BB#0:
@@ -5750,9 +5750,9 @@ declare <16 x i8> @llvm.x86.sse2.psubus.
 define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psubusw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubusw %xmm1, %xmm0
-; GENERIC-NEXT:    psubusw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusw:
 ; ATOM:       # BB#0:
@@ -5803,9 +5803,9 @@ declare <8 x i16> @llvm.x86.sse2.psubus.
 define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psubw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psubw %xmm1, %xmm0
-; GENERIC-NEXT:    psubw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubw:
 ; ATOM:       # BB#0:
@@ -5855,9 +5855,9 @@ define <8 x i16> @test_psubw(<8 x i16> %
 define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_punpckhbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhbw:
 ; ATOM:       # BB#0:
@@ -5907,10 +5907,10 @@ define <16 x i8> @test_punpckhbw(<16 x i
 define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_punpckhdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhdq:
 ; ATOM:       # BB#0:
@@ -5965,10 +5965,10 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_punpckhqdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhqdq:
 ; ATOM:       # BB#0:
@@ -6021,9 +6021,9 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_punpckhwd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhwd:
 ; ATOM:       # BB#0:
@@ -6073,9 +6073,9 @@ define <8 x i16> @test_punpckhwd(<8 x i1
 define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_punpcklbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklbw:
 ; ATOM:       # BB#0:
@@ -6125,10 +6125,10 @@ define <16 x i8> @test_punpcklbw(<16 x i
 define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_punpckldq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckldq:
 ; ATOM:       # BB#0:
@@ -6183,10 +6183,10 @@ define <4 x i32> @test_punpckldq(<4 x i3
 define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_punpcklqdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0]
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklqdq:
 ; ATOM:       # BB#0:
@@ -6239,9 +6239,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_punpcklwd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklwd:
 ; ATOM:       # BB#0:
@@ -6291,10 +6291,10 @@ define <8 x i16> @test_punpcklwd(<8 x i1
 define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_pxor:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pxor %xmm1, %xmm0
-; GENERIC-NEXT:    pxor (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pxor:
 ; ATOM:       # BB#0:
@@ -6347,10 +6347,10 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_shufpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
-; GENERIC-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; GENERIC-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_shufpd:
 ; ATOM:       # BB#0:
@@ -6403,10 +6403,10 @@ define <2 x double> @test_shufpd(<2 x do
 define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_sqrtpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    sqrtpd %xmm0, %xmm1
-; GENERIC-NEXT:    sqrtpd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
+; GENERIC-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_sqrtpd:
 ; ATOM:       # BB#0:
@@ -6463,11 +6463,11 @@ declare <2 x double> @llvm.x86.sse2.sqrt
 define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_sqrtsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    sqrtsd %xmm0, %xmm0
-; GENERIC-NEXT:    movapd (%rdi), %xmm1
-; GENERIC-NEXT:    sqrtsd %xmm1, %xmm1
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; GENERIC-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
+; GENERIC-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_sqrtsd:
 ; ATOM:       # BB#0:
@@ -6527,9 +6527,9 @@ declare <2 x double> @llvm.x86.sse2.sqrt
 define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_subpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    subpd %xmm1, %xmm0
-; GENERIC-NEXT:    subpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_subpd:
 ; ATOM:       # BB#0:
@@ -6575,9 +6575,9 @@ define <2 x double> @test_subpd(<2 x dou
 define double @test_subsd(double %a0, double %a1, double *%a2) {
 ; GENERIC-LABEL: test_subsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    subsd %xmm1, %xmm0
-; GENERIC-NEXT:    subsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_subsd:
 ; ATOM:       # BB#0:
@@ -6623,17 +6623,17 @@ define double @test_subsd(double %a0, do
 define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_ucomisd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    ucomisd %xmm1, %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %cl
-; GENERIC-NEXT:    andb %al, %cl
-; GENERIC-NEXT:    ucomisd (%rdi), %xmm0
-; GENERIC-NEXT:    setnp %al
-; GENERIC-NEXT:    sete %dl
-; GENERIC-NEXT:    andb %al, %dl
-; GENERIC-NEXT:    orb %cl, %dl
-; GENERIC-NEXT:    movzbl %dl, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
+; GENERIC-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
+; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
+; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_ucomisd:
 ; ATOM:       # BB#0:
@@ -6729,10 +6729,10 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2
 define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_unpckhpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_unpckhpd:
 ; ATOM:       # BB#0:
@@ -6785,12 +6785,12 @@ define <2 x double> @test_unpckhpd(<2 x
 define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_unpcklpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; GENERIC-NEXT:    movapd %xmm0, %xmm1
-; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; GENERIC-NEXT:    addpd %xmm0, %xmm1
-; GENERIC-NEXT:    movapd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; GENERIC-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
+; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_unpcklpd:
 ; ATOM:       # BB#0:
@@ -6847,10 +6847,10 @@ define <2 x double> @test_unpcklpd(<2 x
 define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_xorpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    xorpd %xmm1, %xmm0
-; GENERIC-NEXT:    xorpd (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_xorpd:
 ; ATOM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -12,9 +12,9 @@
 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_addsubpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addsubpd %xmm1, %xmm0
-; GENERIC-NEXT:    addsubpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addsubpd:
 ; ATOM:       # BB#0:
@@ -61,9 +61,9 @@ declare <2 x double> @llvm.x86.sse3.adds
 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_addsubps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    addsubps %xmm1, %xmm0
-; GENERIC-NEXT:    addsubps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_addsubps:
 ; ATOM:       # BB#0:
@@ -110,9 +110,9 @@ declare <4 x float> @llvm.x86.sse3.addsu
 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_haddpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    haddpd %xmm1, %xmm0
-; GENERIC-NEXT:    haddpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; GENERIC-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_haddpd:
 ; ATOM:       # BB#0:
@@ -159,9 +159,9 @@ declare <2 x double> @llvm.x86.sse3.hadd
 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_haddps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    haddps %xmm1, %xmm0
-; GENERIC-NEXT:    haddps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
+; GENERIC-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_haddps:
 ; ATOM:       # BB#0:
@@ -208,9 +208,9 @@ declare <4 x float> @llvm.x86.sse3.hadd.
 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_hsubpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    hsubpd %xmm1, %xmm0
-; GENERIC-NEXT:    hsubpd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; GENERIC-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_hsubpd:
 ; ATOM:       # BB#0:
@@ -257,9 +257,9 @@ declare <2 x double> @llvm.x86.sse3.hsub
 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_hsubps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    hsubps %xmm1, %xmm0
-; GENERIC-NEXT:    hsubps (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; GENERIC-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_hsubps:
 ; ATOM:       # BB#0:
@@ -306,8 +306,8 @@ declare <4 x float> @llvm.x86.sse3.hsub.
 define <16 x i8> @test_lddqu(i8* %a0) {
 ; GENERIC-LABEL: test_lddqu:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    lddqu (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_lddqu:
 ; ATOM:       # BB#0:
@@ -348,10 +348,10 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(
 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_movddup:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0]
-; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movddup:
 ; ATOM:       # BB#0:
@@ -405,10 +405,10 @@ define <2 x double> @test_movddup(<2 x d
 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_movshdup:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3]
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movshdup:
 ; ATOM:       # BB#0:
@@ -462,10 +462,10 @@ define <4 x float> @test_movshdup(<4 x f
 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_movsldup:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2]
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movsldup:
 ; ATOM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -11,10 +11,10 @@
 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_blendpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendpd:
 ; SLM:       # BB#0:
@@ -60,9 +60,9 @@ define <2 x double> @test_blendpd(<2 x d
 define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_blendps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
-; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
+; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendps:
 ; SLM:       # BB#0:
@@ -102,12 +102,12 @@ define <4 x float> @test_blendps(<4 x fl
 define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) {
 ; GENERIC-LABEL: test_blendvpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movapd %xmm0, %xmm3
-; GENERIC-NEXT:    movaps %xmm2, %xmm0
-; GENERIC-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; GENERIC-NEXT:    blendvpd %xmm0, (%rdi), %xmm3
-; GENERIC-NEXT:    movapd %xmm3, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
+; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; GENERIC-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; GENERIC-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendvpd:
 ; SLM:       # BB#0:
@@ -151,12 +151,12 @@ declare <2 x double> @llvm.x86.sse41.ble
 define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) {
 ; GENERIC-LABEL: test_blendvps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movaps %xmm0, %xmm3
-; GENERIC-NEXT:    movaps %xmm2, %xmm0
-; GENERIC-NEXT:    blendvps %xmm0, %xmm1, %xmm3
-; GENERIC-NEXT:    blendvps %xmm0, (%rdi), %xmm3
-; GENERIC-NEXT:    movaps %xmm3, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
+; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; GENERIC-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendvps:
 ; SLM:       # BB#0:
@@ -200,9 +200,9 @@ declare <4 x float> @llvm.x86.sse41.blen
 define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_dppd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    dppd $7, %xmm1, %xmm0
-; GENERIC-NEXT:    dppd $7, (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_dppd:
 ; SLM:       # BB#0:
@@ -243,9 +243,9 @@ declare <2 x double> @llvm.x86.sse41.dpp
 define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_dpps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    dpps $7, %xmm1, %xmm0
-; GENERIC-NEXT:    dpps $7, (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; GENERIC-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_dpps:
 ; SLM:       # BB#0:
@@ -286,9 +286,9 @@ declare <4 x float> @llvm.x86.sse41.dpps
 define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) {
 ; GENERIC-LABEL: test_insertps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3]
-; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_insertps:
 ; SLM:       # BB#0:
@@ -329,8 +329,8 @@ declare <4 x float> @llvm.x86.sse41.inse
 define <2 x i64> @test_movntdqa(i8* %a0) {
 ; GENERIC-LABEL: test_movntdqa:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movntdqa (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_movntdqa:
 ; SLM:       # BB#0:
@@ -364,9 +364,9 @@ declare <2 x i64> @llvm.x86.sse41.movntd
 define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_mpsadbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    mpsadbw $7, %xmm1, %xmm0
-; GENERIC-NEXT:    mpsadbw $7, (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_mpsadbw:
 ; SLM:       # BB#0:
@@ -408,9 +408,9 @@ declare <8 x i16> @llvm.x86.sse41.mpsadb
 define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_packusdw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    packusdw %xmm1, %xmm0
-; GENERIC-NEXT:    packusdw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_packusdw:
 ; SLM:       # BB#0:
@@ -452,12 +452,12 @@ declare <8 x i16> @llvm.x86.sse41.packus
 define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) {
 ; GENERIC-LABEL: test_pblendvb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movdqa %xmm0, %xmm3
-; GENERIC-NEXT:    movaps %xmm2, %xmm0
-; GENERIC-NEXT:    pblendvb %xmm0, %xmm1, %xmm3
-; GENERIC-NEXT:    pblendvb %xmm0, (%rdi), %xmm3
-; GENERIC-NEXT:    movdqa %xmm3, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [8:1.00]
+; GENERIC-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [6:1.00]
+; GENERIC-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pblendvb:
 ; SLM:       # BB#0:
@@ -501,9 +501,9 @@ declare <16 x i8> @llvm.x86.sse41.pblend
 define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pblendw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; GENERIC-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7]
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; GENERIC-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pblendw:
 ; SLM:       # BB#0:
@@ -543,9 +543,9 @@ define <8 x i16> @test_pblendw(<8 x i16>
 define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_pcmpeqq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpeqq %xmm1, %xmm0
-; GENERIC-NEXT:    pcmpeqq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpeqq:
 ; SLM:       # BB#0:
@@ -587,9 +587,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
 ; GENERIC-LABEL: test_pextrb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pextrb $3, %xmm0, %eax
-; GENERIC-NEXT:    pextrb $1, %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrb:
 ; SLM:       # BB#0:
@@ -630,9 +630,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i
 define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_pextrd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pextrd $3, %xmm0, %eax
-; GENERIC-NEXT:    pextrd $1, %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrd:
 ; SLM:       # BB#0:
@@ -672,9 +672,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_pextrq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pextrq $1, %xmm0, %rax
-; GENERIC-NEXT:    pextrq $1, %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrq:
 ; SLM:       # BB#0:
@@ -714,9 +714,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <
 define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
 ; GENERIC-LABEL: test_pextrw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pextrw $3, %xmm0, %eax
-; GENERIC-NEXT:    pextrw $1, %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrw:
 ; SLM:       # BB#0:
@@ -757,9 +757,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
 ; GENERIC-LABEL: test_phminposuw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phminposuw (%rdi), %xmm0
-; GENERIC-NEXT:    phminposuw %xmm0, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_phminposuw:
 ; SLM:       # BB#0:
@@ -800,9 +800,9 @@ declare <8 x i16> @llvm.x86.sse41.phminp
 define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
 ; GENERIC-LABEL: test_pinsrb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pinsrb $1, %edi, %xmm0
-; GENERIC-NEXT:    pinsrb $3, (%rsi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pinsrb:
 ; SLM:       # BB#0:
@@ -842,9 +842,9 @@ define <16 x i8> @test_pinsrb(<16 x i8>
 define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_pinsrd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pinsrd $1, %edi, %xmm0
-; GENERIC-NEXT:    pinsrd $3, (%rsi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pinsrd:
 ; SLM:       # BB#0:
@@ -884,10 +884,10 @@ define <4 x i32> @test_pinsrd(<4 x i32>
 define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
 ; GENERIC-LABEL: test_pinsrq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pinsrq $1, %rdi, %xmm0
-; GENERIC-NEXT:    pinsrq $1, (%rsi), %xmm1
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pinsrq:
 ; SLM:       # BB#0:
@@ -933,9 +933,9 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pmaxsb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxsb %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxsb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmaxsb:
 ; SLM:       # BB#0:
@@ -976,9 +976,9 @@ declare <16 x i8> @llvm.x86.sse41.pmaxsb
 define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pmaxsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxsd %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmaxsd:
 ; SLM:       # BB#0:
@@ -1019,9 +1019,9 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd
 define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pmaxud:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxud %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxud (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmaxud:
 ; SLM:       # BB#0:
@@ -1062,9 +1062,9 @@ declare <4 x i32> @llvm.x86.sse41.pmaxud
 define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmaxuw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaxuw %xmm1, %xmm0
-; GENERIC-NEXT:    pmaxuw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmaxuw:
 ; SLM:       # BB#0:
@@ -1105,9 +1105,9 @@ declare <8 x i16> @llvm.x86.sse41.pmaxuw
 define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pminsb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminsb %xmm1, %xmm0
-; GENERIC-NEXT:    pminsb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pminsb:
 ; SLM:       # BB#0:
@@ -1148,9 +1148,9 @@ declare <16 x i8> @llvm.x86.sse41.pminsb
 define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pminsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminsd %xmm1, %xmm0
-; GENERIC-NEXT:    pminsd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pminsd:
 ; SLM:       # BB#0:
@@ -1191,9 +1191,9 @@ declare <4 x i32> @llvm.x86.sse41.pminsd
 define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pminud:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminud %xmm1, %xmm0
-; GENERIC-NEXT:    pminud (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pminud:
 ; SLM:       # BB#0:
@@ -1234,9 +1234,9 @@ declare <4 x i32> @llvm.x86.sse41.pminud
 define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pminuw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pminuw %xmm1, %xmm0
-; GENERIC-NEXT:    pminuw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pminuw:
 ; SLM:       # BB#0:
@@ -1277,10 +1277,10 @@ declare <8 x i16> @llvm.x86.sse41.pminuw
 define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovsxbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxbw %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxbw (%rdi), %xmm0
-; GENERIC-NEXT:    paddw %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxbw:
 ; SLM:       # BB#0:
@@ -1328,10 +1328,10 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovsxbd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxbd %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxbd (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxbd:
 ; SLM:       # BB#0:
@@ -1379,10 +1379,10 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovsxbq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxbq %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxbq (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxbq:
 ; SLM:       # BB#0:
@@ -1430,10 +1430,10 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
 ; GENERIC-LABEL: test_pmovsxdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxdq %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxdq (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxdq:
 ; SLM:       # BB#0:
@@ -1481,10 +1481,10 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
 ; GENERIC-LABEL: test_pmovsxwd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxwd %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxwd (%rdi), %xmm0
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxwd:
 ; SLM:       # BB#0:
@@ -1532,10 +1532,10 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
 ; GENERIC-LABEL: test_pmovsxwq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovsxwq %xmm0, %xmm1
-; GENERIC-NEXT:    pmovsxwq (%rdi), %xmm0
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovsxwq:
 ; SLM:       # BB#0:
@@ -1583,10 +1583,10 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovzxbw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; GENERIC-NEXT:    paddw %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxbw:
 ; SLM:       # BB#0:
@@ -1634,10 +1634,10 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovzxbd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxbd:
 ; SLM:       # BB#0:
@@ -1685,10 +1685,10 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
 ; GENERIC-LABEL: test_pmovzxbq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxbq:
 ; SLM:       # BB#0:
@@ -1736,10 +1736,10 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
 ; GENERIC-LABEL: test_pmovzxdq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
-; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxdq:
 ; SLM:       # BB#0:
@@ -1787,10 +1787,10 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
 ; GENERIC-LABEL: test_pmovzxwd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; GENERIC-NEXT:    paddd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxwd:
 ; SLM:       # BB#0:
@@ -1838,10 +1838,10 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
 ; GENERIC-LABEL: test_pmovzxwq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
-; GENERIC-NEXT:    paddq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmovzxwq:
 ; SLM:       # BB#0:
@@ -1889,9 +1889,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pmuldq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmuldq %xmm1, %xmm0
-; GENERIC-NEXT:    pmuldq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmuldq %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmuldq (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmuldq:
 ; SLM:       # BB#0:
@@ -1933,9 +1933,9 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq
 define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_pmulld:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmulld %xmm1, %xmm0
-; GENERIC-NEXT:    pmulld (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmulld %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmulld (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pmulld:
 ; SLM:       # BB#0:
@@ -1975,13 +1975,13 @@ define <4 x i32> @test_pmulld(<4 x i32>
 define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_ptest:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    ptest %xmm1, %xmm0
-; GENERIC-NEXT:    setb %al
-; GENERIC-NEXT:    ptest (%rdi), %xmm0
-; GENERIC-NEXT:    setb %cl
-; GENERIC-NEXT:    andb %al, %cl
-; GENERIC-NEXT:    movzbl %cl, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    setb %cl # sched: [1:1.00]
+; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
+; GENERIC-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_ptest:
 ; SLM:       # BB#0:
@@ -2043,10 +2043,10 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x
 define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
 ; GENERIC-LABEL: test_roundpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    roundpd $7, %xmm0, %xmm1
-; GENERIC-NEXT:    roundpd $7, (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_roundpd:
 ; SLM:       # BB#0:
@@ -2094,10 +2094,10 @@ declare <2 x double> @llvm.x86.sse41.rou
 define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
 ; GENERIC-LABEL: test_roundps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    roundps $7, %xmm0, %xmm1
-; GENERIC-NEXT:    roundps $7, (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; GENERIC-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_roundps:
 ; SLM:       # BB#0:
@@ -2145,11 +2145,11 @@ declare <4 x float> @llvm.x86.sse41.roun
 define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_roundsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movaps %xmm0, %xmm2
-; GENERIC-NEXT:    roundsd $7, %xmm1, %xmm2
-; GENERIC-NEXT:    roundsd $7, (%rdi), %xmm0
-; GENERIC-NEXT:    addpd %xmm2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
+; GENERIC-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; GENERIC-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_roundsd:
 ; SLM:       # BB#0:
@@ -2197,11 +2197,11 @@ declare <2 x double> @llvm.x86.sse41.rou
 define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_roundss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movaps %xmm0, %xmm2
-; GENERIC-NEXT:    roundss $7, %xmm1, %xmm2
-; GENERIC-NEXT:    roundss $7, (%rdi), %xmm0
-; GENERIC-NEXT:    addps %xmm2, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
+; GENERIC-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; GENERIC-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_roundss:
 ; SLM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -11,10 +11,10 @@
 define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
 ; GENERIC-LABEL: crc32_32_8:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    crc32b %sil, %edi
-; GENERIC-NEXT:    crc32b (%rdx), %edi
-; GENERIC-NEXT:    movl %edi, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; GENERIC-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_8:
 ; SLM:       # BB#0:
@@ -60,10 +60,10 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i
 define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
 ; GENERIC-LABEL: crc32_32_16:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    crc32w %si, %edi
-; GENERIC-NEXT:    crc32w (%rdx), %edi
-; GENERIC-NEXT:    movl %edi, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    crc32w %si, %edi # sched: [3:1.00]
+; GENERIC-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_16:
 ; SLM:       # BB#0:
@@ -109,10 +109,10 @@ declare i32 @llvm.x86.sse42.crc32.32.16(
 define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: crc32_32_32:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    crc32l %esi, %edi
-; GENERIC-NEXT:    crc32l (%rdx), %edi
-; GENERIC-NEXT:    movl %edi, %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
+; GENERIC-NEXT:    crc32l (%rdx), %edi # sched: [7:1.00]
+; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_32:
 ; SLM:       # BB#0:
@@ -158,10 +158,10 @@ declare i32 @llvm.x86.sse42.crc32.32.32(
 define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
 ; GENERIC-LABEL: crc32_64_8:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    crc32b %sil, %edi
-; GENERIC-NEXT:    crc32b (%rdx), %edi
-; GENERIC-NEXT:    movq %rdi, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
+; GENERIC-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
+; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_64_8:
 ; SLM:       # BB#0:
@@ -207,10 +207,10 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i
 define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: crc32_64_64:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    crc32q %rsi, %rdi
-; GENERIC-NEXT:    crc32q (%rdx), %rdi
-; GENERIC-NEXT:    movq %rdi, %rax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
+; GENERIC-NEXT:    crc32q (%rdx), %rdi # sched: [7:1.00]
+; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_64_64:
 ; SLM:       # BB#0:
@@ -256,16 +256,16 @@ declare i64 @llvm.x86.sse42.crc32.64.64(
 define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpestri:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movl $7, %eax
-; GENERIC-NEXT:    movl $7, %edx
-; GENERIC-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; GENERIC-NEXT:    movl %ecx, %esi
-; GENERIC-NEXT:    movl $7, %eax
-; GENERIC-NEXT:    movl $7, %edx
-; GENERIC-NEXT:    pcmpestri $7, (%rdi), %xmm0
+; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl $7, %edx # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; GENERIC-NEXT:    movl %ecx, %esi # sched: [1:0.33]
+; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl $7, %edx # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
 ; GENERIC-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
-; GENERIC-NEXT:    leal (%rcx,%rsi), %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpestri:
 ; SLM:       # BB#0:
@@ -342,13 +342,13 @@ declare i32 @llvm.x86.sse42.pcmpestri128
 define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpestrm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movl $7, %eax
-; GENERIC-NEXT:    movl $7, %edx
-; GENERIC-NEXT:    pcmpestrm $7, %xmm1, %xmm0
-; GENERIC-NEXT:    movl $7, %eax
-; GENERIC-NEXT:    movl $7, %edx
-; GENERIC-NEXT:    pcmpestrm $7, (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl $7, %edx # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl $7, %edx # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpestrm:
 ; SLM:       # BB#0:
@@ -409,12 +409,12 @@ declare <16 x i8> @llvm.x86.sse42.pcmpes
 define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpistri:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpistri $7, %xmm1, %xmm0
-; GENERIC-NEXT:    movl %ecx, %eax
-; GENERIC-NEXT:    pcmpistri $7, (%rdi), %xmm0
+; GENERIC-NEXT:    pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; GENERIC-NEXT:    movl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
 ; GENERIC-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
-; GENERIC-NEXT:    leal (%rcx,%rax), %eax
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpistri:
 ; SLM:       # BB#0:
@@ -471,9 +471,9 @@ declare i32 @llvm.x86.sse42.pcmpistri128
 define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pcmpistrm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpistrm $7, %xmm1, %xmm0
-; GENERIC-NEXT:    pcmpistrm $7, (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; GENERIC-NEXT:    pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpistrm:
 ; SLM:       # BB#0:
@@ -514,9 +514,9 @@ declare <16 x i8> @llvm.x86.sse42.pcmpis
 define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
 ; GENERIC-LABEL: test_pcmpgtq:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pcmpgtq %xmm1, %xmm0
-; GENERIC-NEXT:    pcmpgtq (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pcmpgtq:
 ; SLM:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
 
@@ -7,7 +7,7 @@ define <2 x i64> @test_extrq(<2 x i64> %
 ; GENERIC-LABEL: test_extrq:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    extrq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_extrq:
 ; BTVER2:       # BB#0:
@@ -27,7 +27,7 @@ define <2 x i64> @test_extrqi(<2 x i64>
 ; GENERIC-LABEL: test_extrqi:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    extrq $2, $3, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_extrqi:
 ; BTVER2:       # BB#0:
@@ -47,7 +47,7 @@ define <2 x i64> @test_insertq(<2 x i64>
 ; GENERIC-LABEL: test_insertq:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    insertq %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_insertq:
 ; BTVER2:       # BB#0:
@@ -67,7 +67,7 @@ define <2 x i64> @test_insertqi(<2 x i64
 ; GENERIC-LABEL: test_insertqi:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    insertq $6, $5, %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_insertqi:
 ; BTVER2:       # BB#0:
@@ -86,8 +86,8 @@ declare <2 x i64> @llvm.x86.sse4a.insert
 define void @test_movntsd(i8* %p, <2 x double> %a) {
 ; GENERIC-LABEL: test_movntsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movntsd %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movntsd %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_movntsd:
 ; BTVER2:       # BB#0:
@@ -106,8 +106,8 @@ declare void @llvm.x86.sse4a.movnt.sd(i8
 define void @test_movntss(i8* %p, <4 x float> %a) {
 ; GENERIC-LABEL: test_movntss:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movntss %xmm0, (%rdi)
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    movntss %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: test_movntss:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=309691&r1=309690&r2=309691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Tue Aug  1 08:14:35 2017
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
@@ -12,10 +12,10 @@
 define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
 ; GENERIC-LABEL: test_pabsb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pabsb %xmm0, %xmm1
-; GENERIC-NEXT:    pabsb (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsb:
 ; ATOM:       # BB#0:
@@ -70,10 +70,10 @@ declare <16 x i8> @llvm.x86.ssse3.pabs.b
 define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_pabsd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pabsd %xmm0, %xmm1
-; GENERIC-NEXT:    pabsd (%rdi), %xmm0
-; GENERIC-NEXT:    por %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; GENERIC-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsd:
 ; ATOM:       # BB#0:
@@ -128,8 +128,8 @@ declare <4 x i32> @llvm.x86.ssse3.pabs.d
 define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_pabsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pabsw %xmm0, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pabsw %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsw:
 ; ATOM:       # BB#0:
@@ -177,10 +177,10 @@ declare <8 x i16> @llvm.x86.ssse3.pabs.w
 define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_palignr:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
-; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
-; GENERIC-NEXT:    movdqa %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; GENERIC-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_palignr:
 ; ATOM:       # BB#0:
@@ -230,9 +230,9 @@ define <8 x i16> @test_palignr(<8 x i16>
 define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_phaddd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phaddd %xmm1, %xmm0
-; GENERIC-NEXT:    phaddd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddd:
 ; ATOM:       # BB#0:
@@ -279,9 +279,9 @@ declare <4 x i32> @llvm.x86.ssse3.phadd.
 define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_phaddsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phaddsw %xmm1, %xmm0
-; GENERIC-NEXT:    phaddsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddsw:
 ; ATOM:       # BB#0:
@@ -336,9 +336,9 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.
 define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_phaddw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phaddw %xmm1, %xmm0
-; GENERIC-NEXT:    phaddw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddw:
 ; ATOM:       # BB#0:
@@ -385,9 +385,9 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.
 define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_phsubd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phsubd %xmm1, %xmm0
-; GENERIC-NEXT:    phsubd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubd:
 ; ATOM:       # BB#0:
@@ -434,9 +434,9 @@ declare <4 x i32> @llvm.x86.ssse3.phsub.
 define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_phsubsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phsubsw %xmm1, %xmm0
-; GENERIC-NEXT:    phsubsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubsw:
 ; ATOM:       # BB#0:
@@ -491,9 +491,9 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.
 define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_phsubw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    phsubw %xmm1, %xmm0
-; GENERIC-NEXT:    phsubw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:1.50]
+; GENERIC-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:1.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubw:
 ; ATOM:       # BB#0:
@@ -540,9 +540,9 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.
 define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pmaddubsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmaddubsw %xmm1, %xmm0
-; GENERIC-NEXT:    pmaddubsw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddubsw:
 ; ATOM:       # BB#0:
@@ -590,8 +590,8 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.
 define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_pmulhrsw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pmulhrsw %xmm1, %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhrsw:
 ; ATOM:       # BB#0:
@@ -632,9 +632,9 @@ declare <8 x i16> @llvm.x86.ssse3.pmul.h
 define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_pshufb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    pshufb %xmm1, %xmm0
-; GENERIC-NEXT:    pshufb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufb:
 ; ATOM:       # BB#0:
@@ -681,9 +681,9 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.
 define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ; GENERIC-LABEL: test_psignb:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psignb %xmm1, %xmm0
-; GENERIC-NEXT:    psignb (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignb:
 ; ATOM:       # BB#0:
@@ -738,9 +738,9 @@ declare <16 x i8> @llvm.x86.ssse3.psign.
 define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ; GENERIC-LABEL: test_psignd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psignd %xmm1, %xmm0
-; GENERIC-NEXT:    psignd (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignd:
 ; ATOM:       # BB#0:
@@ -795,9 +795,9 @@ declare <4 x i32> @llvm.x86.ssse3.psign.
 define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ; GENERIC-LABEL: test_psignw:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    psignw %xmm1, %xmm0
-; GENERIC-NEXT:    psignw (%rdi), %xmm0
-; GENERIC-NEXT:    retq
+; GENERIC-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignw:
 ; ATOM:       # BB#0:




More information about the llvm-commits mailing list