[llvm] r311879 - [X86][Haswell] Updating HSW instruction scheduling information

Gadi Haber via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 28 03:04:17 PDT 2017


Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Mon Aug 28 03:04:16 2017
@@ -20,10 +20,10 @@ define i16 @test_andn_i16(i16 zeroext %a
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    notl %edi # sched: [1:0.25]
-; HASWELL-NEXT:    andw (%rdx), %di # sched: [5:0.50]
+; HASWELL-NEXT:    andw (%rdx), %di # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %edi, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andn_i16:
 ; BTVER2:       # BB#0:
@@ -61,9 +61,9 @@ define i32 @test_andn_i32(i32 %a0, i32 %
 ; HASWELL-LABEL: test_andn_i32:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [4:0.50]
+; HASWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andn_i32:
 ; BTVER2:       # BB#0:
@@ -97,9 +97,9 @@ define i64 @test_andn_i64(i64 %a0, i64 %
 ; HASWELL-LABEL: test_andn_i64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [4:0.50]
+; HASWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andn_i64:
 ; BTVER2:       # BB#0:
@@ -132,10 +132,10 @@ define i32 @test_bextr_i32(i32 %a0, i32
 ;
 ; HASWELL-LABEL: test_bextr_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [6:0.50]
+; HASWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [2:0.50]
 ; HASWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_bextr_i32:
 ; BTVER2:       # BB#0:
@@ -168,10 +168,10 @@ define i64 @test_bextr_i64(i64 %a0, i64
 ;
 ; HASWELL-LABEL: test_bextr_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [6:0.50]
+; HASWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [2:0.50]
 ; HASWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_bextr_i64:
 ; BTVER2:       # BB#0:
@@ -204,10 +204,10 @@ define i32 @test_blsi_i32(i32 %a0, i32 *
 ;
 ; HASWELL-LABEL: test_blsi_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsil (%rsi), %ecx # sched: [4:0.50]
+; HASWELL-NEXT:    blsil (%rsi), %ecx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsi_i32:
 ; BTVER2:       # BB#0:
@@ -241,10 +241,10 @@ define i64 @test_blsi_i64(i64 %a0, i64 *
 ;
 ; HASWELL-LABEL: test_blsi_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsiq (%rsi), %rcx # sched: [4:0.50]
+; HASWELL-NEXT:    blsiq (%rsi), %rcx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsi_i64:
 ; BTVER2:       # BB#0:
@@ -278,10 +278,10 @@ define i32 @test_blsmsk_i32(i32 %a0, i32
 ;
 ; HASWELL-LABEL: test_blsmsk_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [4:0.50]
+; HASWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsmsk_i32:
 ; BTVER2:       # BB#0:
@@ -315,10 +315,10 @@ define i64 @test_blsmsk_i64(i64 %a0, i64
 ;
 ; HASWELL-LABEL: test_blsmsk_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [4:0.50]
+; HASWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsmsk_i64:
 ; BTVER2:       # BB#0:
@@ -352,10 +352,10 @@ define i32 @test_blsr_i32(i32 %a0, i32 *
 ;
 ; HASWELL-LABEL: test_blsr_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsrl (%rsi), %ecx # sched: [4:0.50]
+; HASWELL-NEXT:    blsrl (%rsi), %ecx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsr_i32:
 ; BTVER2:       # BB#0:
@@ -389,10 +389,10 @@ define i64 @test_blsr_i64(i64 %a0, i64 *
 ;
 ; HASWELL-LABEL: test_blsr_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    blsrq (%rsi), %rcx # sched: [4:0.50]
+; HASWELL-NEXT:    blsrq (%rsi), %rcx # sched: [1:0.50]
 ; HASWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blsr_i64:
 ; BTVER2:       # BB#0:
@@ -427,11 +427,11 @@ define i16 @test_cttz_i16(i16 zeroext %a
 ;
 ; HASWELL-LABEL: test_cttz_i16:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    tzcntw (%rsi), %cx # sched: [7:1.00]
+; HASWELL-NEXT:    tzcntw (%rsi), %cx # sched: [3:1.00]
 ; HASWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cttz_i16:
 ; BTVER2:       # BB#0:
@@ -466,10 +466,10 @@ define i32 @test_cttz_i32(i32 %a0, i32 *
 ;
 ; HASWELL-LABEL: test_cttz_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [7:1.00]
+; HASWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [3:1.00]
 ; HASWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cttz_i32:
 ; BTVER2:       # BB#0:
@@ -502,10 +502,10 @@ define i64 @test_cttz_i64(i64 %a0, i64 *
 ;
 ; HASWELL-LABEL: test_cttz_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [7:1.00]
+; HASWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [3:1.00]
 ; HASWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cttz_i64:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll Mon Aug 28 03:04:16 2017
@@ -15,10 +15,10 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %
 ;
 ; HASWELL-LABEL: test_bzhi_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    bzhil %edi, (%rdx), %ecx # sched: [4:0.50]
+; HASWELL-NEXT:    bzhil %edi, (%rdx), %ecx # sched: [1:0.50]
 ; HASWELL-NEXT:    bzhil %edi, %esi, %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_bzhi_i32:
 ; ZNVER1:       # BB#0:
@@ -44,10 +44,10 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %
 ;
 ; HASWELL-LABEL: test_bzhi_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    bzhiq %rdi, (%rdx), %rcx # sched: [4:0.50]
+; HASWELL-NEXT:    bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50]
 ; HASWELL-NEXT:    bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_bzhi_i64:
 ; ZNVER1:       # BB#0:
@@ -80,9 +80,9 @@ define i64 @test_mulx_i64(i64 %a0, i64 %
 ; HASWELL-NEXT:    movq %rdx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    movq %rdi, %rdx # sched: [1:0.25]
 ; HASWELL-NEXT:    mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    mulxq (%rax), %rdx, %rax # sched: [8:1.00]
+; HASWELL-NEXT:    mulxq (%rax), %rdx, %rax # sched: [4:1.00]
 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_mulx_i64:
 ; ZNVER1:       # BB#0:
@@ -116,10 +116,10 @@ define i32 @test_pdep_i32(i32 %a0, i32 %
 ;
 ; HASWELL-LABEL: test_pdep_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    pdepl (%rdx), %edi, %ecx # sched: [7:1.00]
+; HASWELL-NEXT:    pdepl (%rdx), %edi, %ecx # sched: [3:1.00]
 ; HASWELL-NEXT:    pdepl %esi, %edi, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_pdep_i32:
 ; ZNVER1:       # BB#0:
@@ -145,10 +145,10 @@ define i64 @test_pdep_i64(i64 %a0, i64 %
 ;
 ; HASWELL-LABEL: test_pdep_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    pdepq (%rdx), %rdi, %rcx # sched: [7:1.00]
+; HASWELL-NEXT:    pdepq (%rdx), %rdi, %rcx # sched: [3:1.00]
 ; HASWELL-NEXT:    pdepq %rsi, %rdi, %rax # sched: [3:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_pdep_i64:
 ; ZNVER1:       # BB#0:
@@ -174,10 +174,10 @@ define i32 @test_pext_i32(i32 %a0, i32 %
 ;
 ; HASWELL-LABEL: test_pext_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    pextl (%rdx), %edi, %ecx # sched: [7:1.00]
+; HASWELL-NEXT:    pextl (%rdx), %edi, %ecx # sched: [3:1.00]
 ; HASWELL-NEXT:    pextl %esi, %edi, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_pext_i32:
 ; ZNVER1:       # BB#0:
@@ -203,10 +203,10 @@ define i64 @test_pext_i64(i64 %a0, i64 %
 ;
 ; HASWELL-LABEL: test_pext_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    pextq (%rdx), %rdi, %rcx # sched: [7:1.00]
+; HASWELL-NEXT:    pextq (%rdx), %rdi, %rcx # sched: [3:1.00]
 ; HASWELL-NEXT:    pextq %rsi, %rdi, %rax # sched: [3:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_pext_i64:
 ; ZNVER1:       # BB#0:
@@ -233,9 +233,9 @@ define i32 @test_rorx_i32(i32 %a0, i32 %
 ; HASWELL-LABEL: test_rorx_i32:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    rorxl $5, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    rorxl $5, (%rdx), %eax # sched: [5:0.50]
+; HASWELL-NEXT:    rorxl $5, (%rdx), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_rorx_i32:
 ; ZNVER1:       # BB#0:
@@ -265,9 +265,9 @@ define i64 @test_rorx_i64(i64 %a0, i64 %
 ; HASWELL-LABEL: test_rorx_i64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    rorxq $5, (%rdx), %rax # sched: [5:0.50]
+; HASWELL-NEXT:    rorxq $5, (%rdx), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_rorx_i64:
 ; ZNVER1:       # BB#0:
@@ -297,9 +297,9 @@ define i32 @test_sarx_i32(i32 %a0, i32 %
 ; HASWELL-LABEL: test_sarx_i32:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    sarxl %esi, (%rdx), %eax # sched: [5:0.50]
+; HASWELL-NEXT:    sarxl %esi, (%rdx), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sarx_i32:
 ; ZNVER1:       # BB#0:
@@ -325,9 +325,9 @@ define i64 @test_sarx_i64(i64 %a0, i64 %
 ; HASWELL-LABEL: test_sarx_i64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    sarxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; HASWELL-NEXT:    sarxq %rsi, (%rdx), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sarx_i64:
 ; ZNVER1:       # BB#0:
@@ -353,9 +353,9 @@ define i32 @test_shlx_i32(i32 %a0, i32 %
 ; HASWELL-LABEL: test_shlx_i32:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    shlxl %esi, (%rdx), %eax # sched: [5:0.50]
+; HASWELL-NEXT:    shlxl %esi, (%rdx), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_shlx_i32:
 ; ZNVER1:       # BB#0:
@@ -381,9 +381,9 @@ define i64 @test_shlx_i64(i64 %a0, i64 %
 ; HASWELL-LABEL: test_shlx_i64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    shlxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; HASWELL-NEXT:    shlxq %rsi, (%rdx), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_shlx_i64:
 ; ZNVER1:       # BB#0:
@@ -409,9 +409,9 @@ define i32 @test_shrx_i32(i32 %a0, i32 %
 ; HASWELL-LABEL: test_shrx_i32:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    shrxl %esi, (%rdx), %eax # sched: [5:0.50]
+; HASWELL-NEXT:    shrxl %esi, (%rdx), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_shrx_i32:
 ; ZNVER1:       # BB#0:
@@ -437,9 +437,9 @@ define i64 @test_shrx_i64(i64 %a0, i64 %
 ; HASWELL-LABEL: test_shrx_i64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    shrxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; HASWELL-NEXT:    shrxq %rsi, (%rdx), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_shrx_i64:
 ; ZNVER1:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Mon Aug 28 03:04:16 2017
@@ -23,10 +23,10 @@ define <4 x float> @test_vcvtph2ps_128(<
 ;
 ; HASWELL-LABEL: test_vcvtph2ps_128:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
-; HASWELL-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00]
+; HASWELL-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_vcvtph2ps_128:
 ; BTVER2:       # BB#0:
@@ -66,10 +66,10 @@ define <8 x float> @test_vcvtph2ps_256(<
 ;
 ; HASWELL-LABEL: test_vcvtph2ps_256:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
-; HASWELL-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [4:1.00]
+; HASWELL-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00]
+; HASWELL-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_vcvtph2ps_256:
 ; BTVER2:       # BB#0:
@@ -108,8 +108,8 @@ define <8 x i16> @test_vcvtps2ph_128(<4
 ; HASWELL-LABEL: test_vcvtps2ph_128:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_vcvtps2ph_128:
 ; BTVER2:       # BB#0:
@@ -147,10 +147,10 @@ define <8 x i16> @test_vcvtps2ph_256(<8
 ;
 ; HASWELL-LABEL: test_vcvtps2ph_256:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [1:?]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
+; HASWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_vcvtps2ph_256:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/lea32-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea32-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea32-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea32-schedule.ll Mon Aug 28 03:04:16 2017
@@ -45,7 +45,7 @@ define i32 @test_lea_offset(i32) {
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal -24(%rdi), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_offset:
 ; BTVER2:       # BB#0:
@@ -97,7 +97,7 @@ define i32 @test_lea_offset_big(i32) {
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal 1024(%rdi), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_offset_big:
 ; BTVER2:       # BB#0:
@@ -155,7 +155,7 @@ define i32 @test_lea_add(i32, i32) {
 ; HASWELL-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add:
 ; BTVER2:       # BB#0:
@@ -217,7 +217,7 @@ define i32 @test_lea_add_offset(i32, i32
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rsi), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $16, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_offset:
 ; BTVER2:       # BB#0:
@@ -283,7 +283,7 @@ define i32 @test_lea_add_offset_big(i32,
 ; HASWELL-NEXT:    leal (%rdi,%rsi), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $-4096, %eax # imm = 0xF000
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_offset_big:
 ; BTVER2:       # BB#0:
@@ -338,7 +338,7 @@ define i32 @test_lea_mul(i32) {
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul:
 ; BTVER2:       # BB#0:
@@ -393,7 +393,7 @@ define i32 @test_lea_mul_offset(i32) {
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $-32, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul_offset:
 ; BTVER2:       # BB#0:
@@ -452,7 +452,7 @@ define i32 @test_lea_mul_offset_big(i32)
 ; HASWELL-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $10000, %eax # imm = 0x2710
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul_offset_big:
 ; BTVER2:       # BB#0:
@@ -510,7 +510,7 @@ define i32 @test_lea_add_scale(i32, i32)
 ; HASWELL-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale:
 ; BTVER2:       # BB#0:
@@ -573,7 +573,7 @@ define i32 @test_lea_add_scale_offset(i3
 ; HASWELL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HASWELL-NEXT:    leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $96, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale_offset:
 ; BTVER2:       # BB#0:
@@ -640,7 +640,7 @@ define i32 @test_lea_add_scale_offset_bi
 ; HASWELL-NEXT:    leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    addl $-1200, %eax # imm = 0xFB50
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale_offset_big:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/lea64-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea64-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea64-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea64-schedule.ll Mon Aug 28 03:04:16 2017
@@ -40,7 +40,7 @@ define i64 @test_lea_offset(i64) {
 ; HASWELL-LABEL: test_lea_offset:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq -24(%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_offset:
 ; BTVER2:       # BB#0:
@@ -85,7 +85,7 @@ define i64 @test_lea_offset_big(i64) {
 ; HASWELL-LABEL: test_lea_offset_big:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq 1024(%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_offset_big:
 ; BTVER2:       # BB#0:
@@ -131,7 +131,7 @@ define i64 @test_lea_add(i64, i64) {
 ; HASWELL-LABEL: test_lea_add:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add:
 ; BTVER2:       # BB#0:
@@ -179,7 +179,7 @@ define i64 @test_lea_add_offset(i64, i64
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rsi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $16, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_offset:
 ; BTVER2:       # BB#0:
@@ -231,7 +231,7 @@ define i64 @test_lea_add_offset_big(i64,
 ; HASWELL-NEXT:    leaq (%rdi,%rsi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $-4096, %rax # imm = 0xF000
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_offset_big:
 ; BTVER2:       # BB#0:
@@ -277,7 +277,7 @@ define i64 @test_lea_mul(i64) {
 ; HASWELL-LABEL: test_lea_mul:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul:
 ; BTVER2:       # BB#0:
@@ -325,7 +325,7 @@ define i64 @test_lea_mul_offset(i64) {
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $-32, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul_offset:
 ; BTVER2:       # BB#0:
@@ -377,7 +377,7 @@ define i64 @test_lea_mul_offset_big(i64)
 ; HASWELL-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $10000, %rax # imm = 0x2710
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_mul_offset_big:
 ; BTVER2:       # BB#0:
@@ -423,7 +423,7 @@ define i64 @test_lea_add_scale(i64, i64)
 ; HASWELL-LABEL: test_lea_add_scale:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale:
 ; BTVER2:       # BB#0:
@@ -472,7 +472,7 @@ define i64 @test_lea_add_scale_offset(i6
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $96, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale_offset:
 ; BTVER2:       # BB#0:
@@ -525,7 +525,7 @@ define i64 @test_lea_add_scale_offset_bi
 ; HASWELL-NEXT:    leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    addq $-1200, %rax # imm = 0xFB50
 ; HASWELL-NEXT:    # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lea_add_scale_offset_big:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll Mon Aug 28 03:04:16 2017
@@ -17,11 +17,11 @@ define i16 @test_ctlz_i16(i16 zeroext %a
 ;
 ; HASWELL-LABEL: test_ctlz_i16:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    lzcntw (%rsi), %cx
-; HASWELL-NEXT:    lzcntw %di, %ax
+; HASWELL-NEXT:    lzcntw (%rsi), %cx # sched: [3:1.00]
+; HASWELL-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i16:
 ; BTVER2:       # BB#0:
@@ -56,10 +56,10 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *
 ;
 ; HASWELL-LABEL: test_ctlz_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    lzcntl (%rsi), %ecx
-; HASWELL-NEXT:    lzcntl %edi, %eax
+; HASWELL-NEXT:    lzcntl (%rsi), %ecx # sched: [3:1.00]
+; HASWELL-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i32:
 ; BTVER2:       # BB#0:
@@ -92,10 +92,10 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *
 ;
 ; HASWELL-LABEL: test_ctlz_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    lzcntq (%rsi), %rcx
-; HASWELL-NEXT:    lzcntq %rdi, %rax
+; HASWELL-NEXT:    lzcntq (%rsi), %rcx # sched: [3:1.00]
+; HASWELL-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i64:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/movbe-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movbe-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movbe-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movbe-schedule.ll Mon Aug 28 03:04:16 2017
@@ -33,9 +33,9 @@ define i16 @test_ctlz_i16(i16 *%a0, i16
 ;
 ; HASWELL-LABEL: test_ctlz_i16:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    movbew (%rdi), %ax # sched: [6:0.50]
+; HASWELL-NEXT:    movbew (%rdi), %ax # sched: [1:0.50]
 ; HASWELL-NEXT:    movbew %si, (%rdx) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i16:
 ; BTVER2:       # BB#0:
@@ -83,7 +83,7 @@ define i32 @test_ctlz_i32(i32 *%a0, i32
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    movbel (%rdi), %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    movbel %esi, (%rdx) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i32:
 ; BTVER2:       # BB#0:
@@ -129,9 +129,9 @@ define i64 @test_ctlz_i64(i64 *%a0, i64
 ;
 ; HASWELL-LABEL: test_ctlz_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    movbeq (%rdi), %rax # sched: [6:0.50]
+; HASWELL-NEXT:    movbeq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    movbeq %rsi, (%rdx) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctlz_i64:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Mon Aug 28 03:04:16 2017
@@ -17,7 +17,7 @@ define i32 @test_mul_by_1(i32 %x) {
 ; X64-HSW-LABEL: test_mul_by_1:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_1:
 ; X64-JAG:       # BB#0:
@@ -32,7 +32,7 @@ define i32 @test_mul_by_1(i32 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_1:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_1:
 ; JAG-NOOPT:       # BB#0:
@@ -63,7 +63,7 @@ define i32 @test_mul_by_2(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_2:
 ; X64-JAG:       # BB#0:
@@ -81,7 +81,7 @@ define i32 @test_mul_by_2(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_2:
 ; JAG-NOOPT:       # BB#0:
@@ -114,7 +114,7 @@ define i32 @test_mul_by_3(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_3:
 ; X64-JAG:       # BB#0:
@@ -131,7 +131,7 @@ define i32 @test_mul_by_3(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_3:
 ; JAG-NOOPT:       # BB#0:
@@ -165,7 +165,7 @@ define i32 @test_mul_by_4(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_4:
 ; X64-JAG:       # BB#0:
@@ -183,7 +183,7 @@ define i32 @test_mul_by_4(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_4:
 ; JAG-NOOPT:       # BB#0:
@@ -216,7 +216,7 @@ define i32 @test_mul_by_5(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_5:
 ; X64-JAG:       # BB#0:
@@ -233,7 +233,7 @@ define i32 @test_mul_by_5(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_5:
 ; JAG-NOOPT:       # BB#0:
@@ -269,7 +269,7 @@ define i32 @test_mul_by_6(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_6:
 ; X64-JAG:       # BB#0:
@@ -285,8 +285,8 @@ define i32 @test_mul_by_6(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_6:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $6, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $6, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_6:
 ; JAG-NOOPT:       # BB#0:
@@ -321,7 +321,7 @@ define i32 @test_mul_by_7(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_7:
 ; X64-JAG:       # BB#0:
@@ -337,8 +337,8 @@ define i32 @test_mul_by_7(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_7:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $7, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $7, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_7:
 ; JAG-NOOPT:       # BB#0:
@@ -371,7 +371,7 @@ define i32 @test_mul_by_8(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_8:
 ; X64-JAG:       # BB#0:
@@ -389,7 +389,7 @@ define i32 @test_mul_by_8(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_8:
 ; JAG-NOOPT:       # BB#0:
@@ -422,7 +422,7 @@ define i32 @test_mul_by_9(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_9:
 ; X64-JAG:       # BB#0:
@@ -439,7 +439,7 @@ define i32 @test_mul_by_9(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_9:
 ; JAG-NOOPT:       # BB#0:
@@ -475,7 +475,7 @@ define i32 @test_mul_by_10(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_10:
 ; X64-JAG:       # BB#0:
@@ -491,8 +491,8 @@ define i32 @test_mul_by_10(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_10:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $10, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $10, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_10:
 ; JAG-NOOPT:       # BB#0:
@@ -527,7 +527,7 @@ define i32 @test_mul_by_11(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_11:
 ; X64-JAG:       # BB#0:
@@ -543,8 +543,8 @@ define i32 @test_mul_by_11(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_11:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $11, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $11, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_11:
 ; JAG-NOOPT:       # BB#0:
@@ -577,7 +577,7 @@ define i32 @test_mul_by_12(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    shll $2, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_12:
 ; X64-JAG:       # BB#0:
@@ -593,8 +593,8 @@ define i32 @test_mul_by_12(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_12:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $12, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $12, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_12:
 ; JAG-NOOPT:       # BB#0:
@@ -629,7 +629,7 @@ define i32 @test_mul_by_13(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_13:
 ; X64-JAG:       # BB#0:
@@ -645,8 +645,8 @@ define i32 @test_mul_by_13(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_13:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $13, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $13, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_13:
 ; JAG-NOOPT:       # BB#0:
@@ -681,7 +681,7 @@ define i32 @test_mul_by_14(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_14:
 ; X64-JAG:       # BB#0:
@@ -698,8 +698,8 @@ define i32 @test_mul_by_14(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_14:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $14, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $14, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_14:
 ; JAG-NOOPT:       # BB#0:
@@ -732,7 +732,7 @@ define i32 @test_mul_by_15(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_15:
 ; X64-JAG:       # BB#0:
@@ -748,8 +748,8 @@ define i32 @test_mul_by_15(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_15:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $15, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $15, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_15:
 ; JAG-NOOPT:       # BB#0:
@@ -782,7 +782,7 @@ define i32 @test_mul_by_16(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_16:
 ; X64-JAG:       # BB#0:
@@ -800,7 +800,7 @@ define i32 @test_mul_by_16(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_16:
 ; JAG-NOOPT:       # BB#0:
@@ -838,7 +838,7 @@ define i32 @test_mul_by_17(i32 %x) {
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shll $4, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_17:
 ; X64-JAG:       # BB#0:
@@ -855,8 +855,8 @@ define i32 @test_mul_by_17(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_17:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $17, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $17, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_17:
 ; JAG-NOOPT:       # BB#0:
@@ -892,7 +892,7 @@ define i32 @test_mul_by_18(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_18:
 ; X64-JAG:       # BB#0:
@@ -908,8 +908,8 @@ define i32 @test_mul_by_18(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_18:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $18, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $18, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_18:
 ; JAG-NOOPT:       # BB#0:
@@ -946,7 +946,7 @@ define i32 @test_mul_by_19(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    shll $2, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_19:
 ; X64-JAG:       # BB#0:
@@ -963,8 +963,8 @@ define i32 @test_mul_by_19(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_19:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $19, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $19, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_19:
 ; JAG-NOOPT:       # BB#0:
@@ -997,7 +997,7 @@ define i32 @test_mul_by_20(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    shll $2, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_20:
 ; X64-JAG:       # BB#0:
@@ -1013,8 +1013,8 @@ define i32 @test_mul_by_20(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_20:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $20, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $20, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_20:
 ; JAG-NOOPT:       # BB#0:
@@ -1049,7 +1049,7 @@ define i32 @test_mul_by_21(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_21:
 ; X64-JAG:       # BB#0:
@@ -1065,8 +1065,8 @@ define i32 @test_mul_by_21(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_21:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $21, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $21, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_21:
 ; JAG-NOOPT:       # BB#0:
@@ -1101,7 +1101,7 @@ define i32 @test_mul_by_22(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_22:
 ; X64-JAG:       # BB#0:
@@ -1118,8 +1118,8 @@ define i32 @test_mul_by_22(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_22:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $22, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $22, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_22:
 ; JAG-NOOPT:       # BB#0:
@@ -1154,7 +1154,7 @@ define i32 @test_mul_by_23(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    shll $3, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_23:
 ; X64-JAG:       # BB#0:
@@ -1171,8 +1171,8 @@ define i32 @test_mul_by_23(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_23:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $23, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $23, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_23:
 ; JAG-NOOPT:       # BB#0:
@@ -1205,7 +1205,7 @@ define i32 @test_mul_by_24(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    shll $3, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_24:
 ; X64-JAG:       # BB#0:
@@ -1221,8 +1221,8 @@ define i32 @test_mul_by_24(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_24:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $24, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $24, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_24:
 ; JAG-NOOPT:       # BB#0:
@@ -1257,7 +1257,7 @@ define i32 @test_mul_by_25(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_25:
 ; X64-JAG:       # BB#0:
@@ -1273,8 +1273,8 @@ define i32 @test_mul_by_25(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_25:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $25, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $25, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_25:
 ; JAG-NOOPT:       # BB#0:
@@ -1311,7 +1311,7 @@ define i32 @test_mul_by_26(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_26:
 ; X64-JAG:       # BB#0:
@@ -1328,8 +1328,8 @@ define i32 @test_mul_by_26(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_26:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $26, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $26, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_26:
 ; JAG-NOOPT:       # BB#0:
@@ -1362,7 +1362,7 @@ define i32 @test_mul_by_27(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_27:
 ; X64-JAG:       # BB#0:
@@ -1378,8 +1378,8 @@ define i32 @test_mul_by_27(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_27:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $27, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $27, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_27:
 ; JAG-NOOPT:       # BB#0:
@@ -1416,7 +1416,7 @@ define i32 @test_mul_by_28(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_28:
 ; X64-JAG:       # BB#0:
@@ -1433,8 +1433,8 @@ define i32 @test_mul_by_28(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_28:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $28, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $28, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_28:
 ; JAG-NOOPT:       # BB#0:
@@ -1471,7 +1471,7 @@ define i32 @test_mul_by_29(i32 %x) {
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
 ; X64-JAG:       # BB#0:
@@ -1489,8 +1489,8 @@ define i32 @test_mul_by_29(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_29:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $29, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $29, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_29:
 ; JAG-NOOPT:       # BB#0:
@@ -1526,7 +1526,7 @@ define i32 @test_mul_by_30(i32 %x) {
 ; X64-HSW-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_30:
 ; X64-JAG:       # BB#0:
@@ -1543,8 +1543,8 @@ define i32 @test_mul_by_30(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_30:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $30, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $30, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_30:
 ; JAG-NOOPT:       # BB#0:
@@ -1578,7 +1578,7 @@ define i32 @test_mul_by_31(i32 %x) {
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_31:
 ; X64-JAG:       # BB#0:
@@ -1594,8 +1594,8 @@ define i32 @test_mul_by_31(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_31:
 ; HSW-NOOPT:       # BB#0:
-; HSW-NOOPT-NEXT:    imull $31, %edi, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imull $31, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_31:
 ; JAG-NOOPT:       # BB#0:
@@ -1628,7 +1628,7 @@ define i32 @test_mul_by_32(i32 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_32:
 ; X64-JAG:       # BB#0:
@@ -1646,7 +1646,7 @@ define i32 @test_mul_by_32(i32 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_32:
 ; JAG-NOOPT:       # BB#0:
@@ -1687,7 +1687,7 @@ define i32 @test_mul_spec(i32 %x) nounwi
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addl $2, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    imull %ecx, %eax # sched: [4:1.00]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_spec:
 ; X64-JAG:       # BB#0:
@@ -1713,7 +1713,7 @@ define i32 @test_mul_spec(i32 %x) nounwi
 ; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    addl $2, %eax # sched: [1:0.25]
 ; HSW-NOOPT-NEXT:    imull %ecx, %eax # sched: [4:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_spec:
 ; JAG-NOOPT:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Mon Aug 28 03:04:16 2017
@@ -18,7 +18,7 @@ define i64 @test_mul_by_1(i64 %x) nounwi
 ; X64-HSW-LABEL: test_mul_by_1:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_1:
 ; X64-JAG:       # BB#0:
@@ -34,7 +34,7 @@ define i64 @test_mul_by_1(i64 %x) nounwi
 ; HSW-NOOPT-LABEL: test_mul_by_1:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_1:
 ; JAG-NOOPT:       # BB#0:
@@ -66,7 +66,7 @@ define i64 @test_mul_by_2(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_2:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_2:
 ; X64-JAG:       # BB#0:
@@ -84,7 +84,7 @@ define i64 @test_mul_by_2(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_2:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_2:
 ; JAG-NOOPT:       # BB#0:
@@ -116,7 +116,7 @@ define i64 @test_mul_by_3(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_3:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_3:
 ; X64-JAG:       # BB#0:
@@ -134,7 +134,7 @@ define i64 @test_mul_by_3(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_3:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_3:
 ; JAG-NOOPT:       # BB#0:
@@ -166,7 +166,7 @@ define i64 @test_mul_by_4(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_4:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_4:
 ; X64-JAG:       # BB#0:
@@ -184,7 +184,7 @@ define i64 @test_mul_by_4(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_4:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_4:
 ; JAG-NOOPT:       # BB#0:
@@ -216,7 +216,7 @@ define i64 @test_mul_by_5(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_5:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_5:
 ; X64-JAG:       # BB#0:
@@ -234,7 +234,7 @@ define i64 @test_mul_by_5(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_5:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_5:
 ; JAG-NOOPT:       # BB#0:
@@ -268,7 +268,7 @@ define i64 @test_mul_by_6(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_6:
 ; X64-JAG:       # BB#0:
@@ -287,7 +287,7 @@ define i64 @test_mul_by_6(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_6:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $6, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_6:
 ; JAG-NOOPT:       # BB#0:
@@ -323,7 +323,7 @@ define i64 @test_mul_by_7(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_7:
 ; X64-JAG:       # BB#0:
@@ -342,7 +342,7 @@ define i64 @test_mul_by_7(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_7:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $7, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_7:
 ; JAG-NOOPT:       # BB#0:
@@ -375,7 +375,7 @@ define i64 @test_mul_by_8(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_8:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_8:
 ; X64-JAG:       # BB#0:
@@ -393,7 +393,7 @@ define i64 @test_mul_by_8(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_8:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_8:
 ; JAG-NOOPT:       # BB#0:
@@ -425,7 +425,7 @@ define i64 @test_mul_by_9(i64 %x) {
 ; X64-HSW-LABEL: test_mul_by_9:
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_9:
 ; X64-JAG:       # BB#0:
@@ -443,7 +443,7 @@ define i64 @test_mul_by_9(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_9:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_9:
 ; JAG-NOOPT:       # BB#0:
@@ -477,7 +477,7 @@ define i64 @test_mul_by_10(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_10:
 ; X64-JAG:       # BB#0:
@@ -496,7 +496,7 @@ define i64 @test_mul_by_10(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_10:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $10, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_10:
 ; JAG-NOOPT:       # BB#0:
@@ -532,7 +532,7 @@ define i64 @test_mul_by_11(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_11:
 ; X64-JAG:       # BB#0:
@@ -551,7 +551,7 @@ define i64 @test_mul_by_11(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_11:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $11, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_11:
 ; JAG-NOOPT:       # BB#0:
@@ -585,7 +585,7 @@ define i64 @test_mul_by_12(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shlq $2, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_12:
 ; X64-JAG:       # BB#0:
@@ -604,7 +604,7 @@ define i64 @test_mul_by_12(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_12:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $12, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_12:
 ; JAG-NOOPT:       # BB#0:
@@ -640,7 +640,7 @@ define i64 @test_mul_by_13(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_13:
 ; X64-JAG:       # BB#0:
@@ -659,7 +659,7 @@ define i64 @test_mul_by_13(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_13:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $13, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_13:
 ; JAG-NOOPT:       # BB#0:
@@ -696,7 +696,7 @@ define i64 @test_mul_by_14(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_14:
 ; X64-JAG:       # BB#0:
@@ -716,7 +716,7 @@ define i64 @test_mul_by_14(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_14:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $14, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_14:
 ; JAG-NOOPT:       # BB#0:
@@ -751,7 +751,7 @@ define i64 @test_mul_by_15(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_15:
 ; X64-JAG:       # BB#0:
@@ -770,7 +770,7 @@ define i64 @test_mul_by_15(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_15:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $15, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_15:
 ; JAG-NOOPT:       # BB#0:
@@ -804,7 +804,7 @@ define i64 @test_mul_by_16(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_16:
 ; X64-JAG:       # BB#0:
@@ -824,7 +824,7 @@ define i64 @test_mul_by_16(i64 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_16:
 ; JAG-NOOPT:       # BB#0:
@@ -864,7 +864,7 @@ define i64 @test_mul_by_17(i64 %x) {
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shlq $4, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_17:
 ; X64-JAG:       # BB#0:
@@ -884,7 +884,7 @@ define i64 @test_mul_by_17(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_17:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $17, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_17:
 ; JAG-NOOPT:       # BB#0:
@@ -920,7 +920,7 @@ define i64 @test_mul_by_18(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_18:
 ; X64-JAG:       # BB#0:
@@ -939,7 +939,7 @@ define i64 @test_mul_by_18(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_18:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $18, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_18:
 ; JAG-NOOPT:       # BB#0:
@@ -977,7 +977,7 @@ define i64 @test_mul_by_19(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    shlq $2, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_19:
 ; X64-JAG:       # BB#0:
@@ -997,7 +997,7 @@ define i64 @test_mul_by_19(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_19:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $19, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_19:
 ; JAG-NOOPT:       # BB#0:
@@ -1031,7 +1031,7 @@ define i64 @test_mul_by_20(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shlq $2, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_20:
 ; X64-JAG:       # BB#0:
@@ -1050,7 +1050,7 @@ define i64 @test_mul_by_20(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_20:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $20, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_20:
 ; JAG-NOOPT:       # BB#0:
@@ -1086,7 +1086,7 @@ define i64 @test_mul_by_21(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_21:
 ; X64-JAG:       # BB#0:
@@ -1105,7 +1105,7 @@ define i64 @test_mul_by_21(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_21:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $21, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_21:
 ; JAG-NOOPT:       # BB#0:
@@ -1142,7 +1142,7 @@ define i64 @test_mul_by_22(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_22:
 ; X64-JAG:       # BB#0:
@@ -1162,7 +1162,7 @@ define i64 @test_mul_by_22(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_22:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $22, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_22:
 ; JAG-NOOPT:       # BB#0:
@@ -1199,7 +1199,7 @@ define i64 @test_mul_by_23(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    shlq $3, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_23:
 ; X64-JAG:       # BB#0:
@@ -1219,7 +1219,7 @@ define i64 @test_mul_by_23(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_23:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $23, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_23:
 ; JAG-NOOPT:       # BB#0:
@@ -1253,7 +1253,7 @@ define i64 @test_mul_by_24(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shlq $3, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_24:
 ; X64-JAG:       # BB#0:
@@ -1272,7 +1272,7 @@ define i64 @test_mul_by_24(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_24:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $24, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_24:
 ; JAG-NOOPT:       # BB#0:
@@ -1308,7 +1308,7 @@ define i64 @test_mul_by_25(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_25:
 ; X64-JAG:       # BB#0:
@@ -1327,7 +1327,7 @@ define i64 @test_mul_by_25(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_25:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $25, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_25:
 ; JAG-NOOPT:       # BB#0:
@@ -1365,7 +1365,7 @@ define i64 @test_mul_by_26(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_26:
 ; X64-JAG:       # BB#0:
@@ -1385,7 +1385,7 @@ define i64 @test_mul_by_26(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_26:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $26, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_26:
 ; JAG-NOOPT:       # BB#0:
@@ -1420,7 +1420,7 @@ define i64 @test_mul_by_27(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_27:
 ; X64-JAG:       # BB#0:
@@ -1439,7 +1439,7 @@ define i64 @test_mul_by_27(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_27:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $27, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_27:
 ; JAG-NOOPT:       # BB#0:
@@ -1477,7 +1477,7 @@ define i64 @test_mul_by_28(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_28:
 ; X64-JAG:       # BB#0:
@@ -1497,7 +1497,7 @@ define i64 @test_mul_by_28(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_28:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $28, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_28:
 ; JAG-NOOPT:       # BB#0:
@@ -1536,7 +1536,7 @@ define i64 @test_mul_by_29(i64 %x) {
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
 ; X64-JAG:       # BB#0:
@@ -1557,7 +1557,7 @@ define i64 @test_mul_by_29(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_29:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $29, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_29:
 ; JAG-NOOPT:       # BB#0:
@@ -1596,7 +1596,7 @@ define i64 @test_mul_by_30(i64 %x) {
 ; X64-HSW-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_30:
 ; X64-JAG:       # BB#0:
@@ -1617,7 +1617,7 @@ define i64 @test_mul_by_30(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_30:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $30, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_30:
 ; JAG-NOOPT:       # BB#0:
@@ -1654,7 +1654,7 @@ define i64 @test_mul_by_31(i64 %x) {
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_31:
 ; X64-JAG:       # BB#0:
@@ -1674,7 +1674,7 @@ define i64 @test_mul_by_31(i64 %x) {
 ; HSW-NOOPT-LABEL: test_mul_by_31:
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    imulq $31, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_31:
 ; JAG-NOOPT:       # BB#0:
@@ -1709,7 +1709,7 @@ define i64 @test_mul_by_32(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_32:
 ; X64-JAG:       # BB#0:
@@ -1729,7 +1729,7 @@ define i64 @test_mul_by_32(i64 %x) {
 ; HSW-NOOPT:       # BB#0:
 ; HSW-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_32:
 ; JAG-NOOPT:       # BB#0:
@@ -1792,8 +1792,8 @@ define i64 @test_mul_spec(i64 %x) nounwi
 ; X64-HSW-NEXT:    addq $42, %rcx # sched: [1:0.25]
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    addq $2, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
-; X64-HSW-NEXT:    retq # sched: [1:1.00]
+; X64-HSW-NEXT:    imulq %rcx, %rax # sched: [4:1.00]
+; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_spec:
 ; X64-JAG:       # BB#0:
@@ -1840,8 +1840,8 @@ define i64 @test_mul_spec(i64 %x) nounwi
 ; HSW-NOOPT-NEXT:    addq $42, %rcx # sched: [1:0.25]
 ; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    addq $2, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+; HSW-NOOPT-NEXT:    imulq %rcx, %rax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [2:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_spec:
 ; JAG-NOOPT:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll Mon Aug 28 03:04:16 2017
@@ -37,11 +37,11 @@ define i16 @test_ctpop_i16(i16 zeroext %
 ;
 ; HASWELL-LABEL: test_ctpop_i16:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    popcntw (%rsi), %cx # sched: [7:1.00]
+; HASWELL-NEXT:    popcntw (%rsi), %cx # sched: [3:1.00]
 ; HASWELL-NEXT:    popcntw %di, %ax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctpop_i16:
 ; BTVER2:       # BB#0:
@@ -90,10 +90,10 @@ define i32 @test_ctpop_i32(i32 %a0, i32
 ;
 ; HASWELL-LABEL: test_ctpop_i32:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    popcntl (%rsi), %ecx # sched: [7:1.00]
+; HASWELL-NEXT:    popcntl (%rsi), %ecx # sched: [3:1.00]
 ; HASWELL-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctpop_i32:
 ; BTVER2:       # BB#0:
@@ -140,10 +140,10 @@ define i64 @test_ctpop_i64(i64 %a0, i64
 ;
 ; HASWELL-LABEL: test_ctpop_i64:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    popcntq (%rsi), %rcx # sched: [7:1.00]
+; HASWELL-NEXT:    popcntq (%rsi), %rcx # sched: [3:1.00]
 ; HASWELL-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ctpop_i64:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/pr32329.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32329.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32329.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32329.ll Mon Aug 28 03:04:16 2017
@@ -36,33 +36,33 @@ define void @foo() local_unnamed_addr {
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:  .Lcfi7:
 ; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movl obj, %edx
 ; X86-NEXT:    movsbl var_27, %eax
-; X86-NEXT:    movzwl var_2, %esi
 ; X86-NEXT:    movl var_310, %ecx
 ; X86-NEXT:    imull %eax, %ecx
+; X86-NEXT:    movl obj, %esi
 ; X86-NEXT:    addl var_24, %ecx
-; X86-NEXT:    andl $4194303, %edx # imm = 0x3FFFFF
-; X86-NEXT:    leal (%edx,%edx), %ebx
-; X86-NEXT:    subl %eax, %ebx
-; X86-NEXT:    movl %ebx, %edi
-; X86-NEXT:    subl %esi, %edi
-; X86-NEXT:    imull %edi, %ecx
+; X86-NEXT:    movzwl var_2, %edi
+; X86-NEXT:    andl $4194303, %esi # imm = 0x3FFFFF
+; X86-NEXT:    leal (%esi,%esi), %edx
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    subl %edi, %ebx
+; X86-NEXT:    imull %ebx, %ecx
 ; X86-NEXT:    addl $-1437483407, %ecx # imm = 0xAA51BE71
-; X86-NEXT:    movl $9, %esi
+; X86-NEXT:    movl $9, %edi
 ; X86-NEXT:    xorl %ebp, %ebp
-; X86-NEXT:    shldl %cl, %esi, %ebp
-; X86-NEXT:    shlxl %ecx, %esi, %esi
+; X86-NEXT:    shldl %cl, %edi, %ebp
+; X86-NEXT:    shlxl %ecx, %edi, %edi
 ; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    cmovnel %esi, %ebp
+; X86-NEXT:    cmovnel %edi, %ebp
 ; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovnel %ecx, %esi
-; X86-NEXT:    cmpl %edx, %edi
+; X86-NEXT:    cmovnel %ecx, %edi
 ; X86-NEXT:    movl %ebp, var_50+4
-; X86-NEXT:    movl %esi, var_50
+; X86-NEXT:    cmpl %esi, %ebx
 ; X86-NEXT:    setge var_205
-; X86-NEXT:    imull %eax, %ebx
-; X86-NEXT:    movb %bl, var_218
+; X86-NEXT:    imull %eax, %edx
+; X86-NEXT:    movl %edi, var_50
+; X86-NEXT:    movb %dl, var_218
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -71,25 +71,25 @@ define void @foo() local_unnamed_addr {
 ;
 ; X64-LABEL: foo:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    movl {{.*}}(%rip), %eax
-; X64-NEXT:    movsbl {{.*}}(%rip), %r9d
+; X64-NEXT:    movsbl {{.*}}(%rip), %eax
+; X64-NEXT:    movl {{.*}}(%rip), %ecx
+; X64-NEXT:    imull %eax, %ecx
+; X64-NEXT:    movl {{.*}}(%rip), %edx
+; X64-NEXT:    addl {{.*}}(%rip), %ecx
 ; X64-NEXT:    movzwl {{.*}}(%rip), %r8d
-; X64-NEXT:    movl {{.*}}(%rip), %esi
-; X64-NEXT:    imull %r9d, %esi
-; X64-NEXT:    addl {{.*}}(%rip), %esi
-; X64-NEXT:    andl $4194303, %eax # imm = 0x3FFFFF
-; X64-NEXT:    leal (%rax,%rax), %edi
-; X64-NEXT:    subl %r9d, %edi
-; X64-NEXT:    movl %edi, %edx
-; X64-NEXT:    subl %r8d, %edx
-; X64-NEXT:    imull %edx, %esi
-; X64-NEXT:    addl $-1437483407, %esi # imm = 0xAA51BE71
-; X64-NEXT:    movl $9, %ecx
-; X64-NEXT:    shlxq %rsi, %rcx, %rcx
-; X64-NEXT:    movq %rcx, {{.*}}(%rip)
-; X64-NEXT:    cmpl %eax, %edx
+; X64-NEXT:    andl $4194303, %edx # imm = 0x3FFFFF
+; X64-NEXT:    leal (%rdx,%rdx), %edi
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    movl %edi, %esi
+; X64-NEXT:    subl %r8d, %esi
+; X64-NEXT:    imull %esi, %ecx
+; X64-NEXT:    addl $-1437483407, %ecx # imm = 0xAA51BE71
+; X64-NEXT:    movl $9, %r8d
+; X64-NEXT:    cmpl %edx, %esi
 ; X64-NEXT:    setge {{.*}}(%rip)
-; X64-NEXT:    imull %r9d, %edi
+; X64-NEXT:    shlxq %rcx, %r8, %rcx
+; X64-NEXT:    imull %eax, %edi
+; X64-NEXT:    movq %rcx, {{.*}}(%rip)
 ; X64-NEXT:    movb %dil, {{.*}}(%rip)
 ; X64-NEXT:    retq
   entry:

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Mon Aug 28 03:04:16 2017
@@ -51,9 +51,9 @@ define float @f32_no_estimate(float %x)
 ;
 ; HASWELL-LABEL: f32_no_estimate:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; HASWELL-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; HASWELL-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_no_estimate:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -63,9 +63,9 @@ define float @f32_no_estimate(float %x)
 ;
 ; AVX512-LABEL: f32_no_estimate:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; AVX512-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; AVX512-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 1.0, %x
   ret float %div
 }
@@ -122,9 +122,9 @@ define float @f32_one_step(float %x) #1
 ; HASWELL-LABEL: f32_one_step:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_one_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -139,9 +139,9 @@ define float @f32_one_step(float %x) #1
 ; AVX512-LABEL: f32_one_step:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 1.0, %x
   ret float %div
 }
@@ -220,13 +220,13 @@ define float @f32_two_step(float %x) #2
 ; HASWELL-LABEL: f32_two_step:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3
-; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3
-; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0
-; HASWELL-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_two_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -245,13 +245,13 @@ define float @f32_two_step(float %x) #2
 ; AVX512-LABEL: f32_two_step:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3
-; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3
-; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 1.0, %x
   ret float %div
 }
@@ -290,9 +290,9 @@ define <4 x float> @v4f32_no_estimate(<4
 ;
 ; HASWELL-LABEL: v4f32_no_estimate:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
-; HASWELL-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -302,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4
 ;
 ; AVX512-LABEL: v4f32_no_estimate:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
-; AVX512-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [1:0.50]
+; AVX512-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
 }
@@ -361,10 +361,10 @@ define <4 x float> @v4f32_one_step(<4 x
 ; HASWELL-LABEL: v4f32_one_step:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_one_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -379,17 +379,17 @@ define <4 x float> @v4f32_one_step(<4 x
 ; KNL-LABEL: v4f32_one_step:
 ; KNL:       # BB#0:
 ; KNL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v4f32_one_step:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %xmm0, %xmm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
-; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
 }
@@ -468,13 +468,13 @@ define <4 x float> @v4f32_two_step(<4 x
 ; HASWELL-LABEL: v4f32_two_step:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; HASWELL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_two_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -493,24 +493,24 @@ define <4 x float> @v4f32_two_step(<4 x
 ; KNL-LABEL: v4f32_two_step:
 ; KNL:       # BB#0:
 ; KNL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; KNL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; KNL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v4f32_two_step:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %xmm0, %xmm1
-; SKX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; SKX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; SKX-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; SKX-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
 }
@@ -552,9 +552,9 @@ define <8 x float> @v8f32_no_estimate(<8
 ;
 ; HASWELL-LABEL: v8f32_no_estimate:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; HASWELL-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -564,9 +564,9 @@ define <8 x float> @v8f32_no_estimate(<8
 ;
 ; AVX512-LABEL: v8f32_no_estimate:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; AVX512-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; AVX512-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
 }
@@ -629,11 +629,11 @@ define <8 x float> @v8f32_one_step(<8 x
 ;
 ; HASWELL-LABEL: v8f32_one_step:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_one_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -647,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x
 ;
 ; KNL-LABEL: v8f32_one_step:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_one_step:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
-; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
 }
@@ -749,14 +749,14 @@ define <8 x float> @v8f32_two_step(<8 x
 ;
 ; HASWELL-LABEL: v8f32_two_step:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; HASWELL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_two_step:
 ; HASWELL-NO-FMA:       # BB#0:
@@ -774,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x
 ;
 ; KNL-LABEL: v8f32_two_step:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; KNL-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; KNL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_two_step:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm1
-; SKX-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; SKX-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; SKX-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; SKX-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
 }

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Mon Aug 28 03:04:16 2017
@@ -45,20 +45,20 @@ define float @f32_no_step_2(float %x) #3
 ; HASWELL-LABEL: f32_no_step_2:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_no_step_2:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; AVX512-LABEL: f32_no_step_2:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 1234.0, %x
   ret float %div
 }
@@ -120,29 +120,29 @@ define float @f32_one_step_2(float %x) #
 ; HASWELL-LABEL: f32_one_step_2:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_one_step_2:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; AVX512-LABEL: f32_one_step_2:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 3456.0, %x
   ret float %div
 }
@@ -209,32 +209,32 @@ define float @f32_one_step_2_divs(float
 ; HASWELL-LABEL: f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; AVX512-LABEL: f32_one_step_2_divs:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
-; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; AVX512-NEXT:    vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 3456.0, %x
   %div2 = fdiv fast float %div, %x
   ret float %div2
@@ -319,20 +319,20 @@ define float @f32_two_step_2(float %x) #
 ; HASWELL-LABEL: f32_two_step_2:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3
-; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3
-; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0
-; HASWELL-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: f32_two_step_2:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -340,20 +340,20 @@ define float @f32_two_step_2(float %x) #
 ; HASWELL-NO-FMA-NEXT:    vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; AVX512-LABEL: f32_two_step_2:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3
-; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3
-; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0
-; AVX512-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0
-; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; AVX512-NEXT:    retq # sched: [1:1.00]
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast float 6789.0, %x
   ret float %div
 }
@@ -415,39 +415,39 @@ define <4 x float> @v4f32_one_step2(<4 x
 ; HASWELL-LABEL: v4f32_one_step2:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v4f32_one_step2:
 ; KNL:       # BB#0:
 ; KNL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v4f32_one_step2:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %xmm0, %xmm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
-; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
   ret <4 x float> %div
 }
@@ -514,43 +514,43 @@ define <4 x float> @v4f32_one_step_2_div
 ; HASWELL-LABEL: v4f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; HASWELL-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v4f32_one_step_2_divs:
 ; KNL:       # BB#0:
 ; KNL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; KNL-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v4f32_one_step_2_divs:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %xmm0, %xmm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
-; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
 ; SKX-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
   %div2 = fdiv fast <4 x float> %div, %x
   ret <4 x float> %div2
@@ -635,20 +635,20 @@ define <4 x float> @v4f32_two_step2(<4 x
 ; HASWELL-LABEL: v4f32_two_step2:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; HASWELL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
 ; HASWELL-NO-FMA:       # BB#0:
 ; HASWELL-NO-FMA-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -656,32 +656,32 @@ define <4 x float> @v4f32_two_step2(<4 x
 ; HASWELL-NO-FMA-NEXT:    vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; HASWELL-NO-FMA-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v4f32_two_step2:
 ; KNL:       # BB#0:
 ; KNL-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; KNL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; KNL-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; KNL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; KNL-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v4f32_two_step2:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %xmm0, %xmm1
-; SKX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
+; SKX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
 ; SKX-NEXT:    vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3
-; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3
-; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0
-; SKX-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
+; SKX-NEXT:    vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
   ret <4 x float> %div
 }
@@ -750,40 +750,40 @@ define <8 x float> @v8f32_one_step2(<8 x
 ;
 ; HASWELL-LABEL: v8f32_one_step2:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
 ; HASWELL-NO-FMA:       # BB#0:
-; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v8f32_one_step2:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_one_step2:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
-; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
   ret <8 x float> %div
 }
@@ -858,44 +858,44 @@ define <8 x float> @v8f32_one_step_2_div
 ;
 ; HASWELL-LABEL: v8f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
 ; HASWELL-NO-FMA:       # BB#0:
-; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v8f32_one_step_2_divs:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
-; KNL-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
+; KNL-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_one_step_2_divs:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm1
 ; SKX-NEXT:    vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
-; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
-; SKX-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
+; SKX-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
   %div2 = fdiv fast <8 x float> %div, %x
   ret <8 x float> %div2
@@ -993,54 +993,54 @@ define <8 x float> @v8f32_two_step2(<8 x
 ;
 ; HASWELL-LABEL: v8f32_two_step2:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; HASWELL-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; HASWELL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; HASWELL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
 ; HASWELL-NO-FMA:       # BB#0:
-; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
 ; HASWELL-NO-FMA-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v8f32_two_step2:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm1 # sched: [11:2.00]
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; KNL-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; KNL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; KNL-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_two_step2:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm1
-; SKX-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; SKX-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
 ; SKX-NEXT:    vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3
-; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3
-; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0
-; SKX-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
+; SKX-NEXT:    vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
   ret <8 x float> %div
 }
@@ -1074,23 +1074,23 @@ define <8 x float> @v8f32_no_step(<8 x f
 ;
 ; HASWELL-LABEL: v8f32_no_step:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_no_step:
 ; HASWELL-NO-FMA:       # BB#0:
-; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v8f32_no_step:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_no_step:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm0
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
 }
@@ -1130,27 +1130,27 @@ define <8 x float> @v8f32_no_step2(<8 x
 ;
 ; HASWELL-LABEL: v8f32_no_step2:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; HASWELL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
 ; HASWELL-NO-FMA:       # BB#0:
-; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; HASWELL-NO-FMA-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NO-FMA-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; HASWELL-NO-FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT:    retq # sched: [2:1.00]
 ;
 ; KNL-LABEL: v8f32_no_step2:
 ; KNL:       # BB#0:
-; KNL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; KNL-NEXT:    retq # sched: [1:1.00]
+; KNL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
+; KNL-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; KNL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKX-LABEL: v8f32_no_step2:
 ; SKX:       # BB#0:
 ; SKX-NEXT:    vrcp14ps %ymm0, %ymm0
-; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SKX-NEXT:    retq # sched: [1:1.00]
+; SKX-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
+; SKX-NEXT:    retq # sched: [2:1.00]
   %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
   ret <8 x float> %div
 }

Modified: llvm/trunk/test/CodeGen/X86/sha-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sha-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sha-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sha-schedule.ll Mon Aug 28 03:04:16 2017
@@ -25,7 +25,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha1msg1 %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha1msg1 (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1msg1:
 ; ZNVER1:       # BB#0:
@@ -56,7 +56,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha1msg2 %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha1msg2 (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1msg2:
 ; ZNVER1:       # BB#0:
@@ -87,7 +87,7 @@ define <4 x i32> @test_sha1nexte(<4 x i3
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha1nexte %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha1nexte (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1nexte:
 ; ZNVER1:       # BB#0:
@@ -118,7 +118,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i3
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1rnds4:
 ; ZNVER1:       # BB#0:
@@ -153,7 +153,7 @@ define <4 x i32> @test_sha256msg1(<4 x i
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha256msg1 %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha256msg1 (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha256msg1:
 ; ZNVER1:       # BB#0:
@@ -184,7 +184,7 @@ define <4 x i32> @test_sha256msg2(<4 x i
 ; CANNONLAKE:       # BB#0:
 ; CANNONLAKE-NEXT:    sha256msg2 %xmm1, %xmm0
 ; CANNONLAKE-NEXT:    sha256msg2 (%rdi), %xmm0
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha256msg2:
 ; ZNVER1:       # BB#0:
@@ -224,7 +224,7 @@ define <4 x i32> @test_sha256rnds2(<4 x
 ; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
 ; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3
 ; CANNONLAKE-NEXT:    vmovaps %xmm3, %xmm0 # sched: [1:1.00]
-; CANNONLAKE-NEXT:    retq # sched: [1:1.00]
+; CANNONLAKE-NEXT:    retq # sched: [2:1.00]
 ;
 ; ZNVER1-LABEL: test_sha256rnds2:
 ; ZNVER1:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Mon Aug 28 03:04:16 2017
@@ -37,8 +37,8 @@ define <4 x float> @test_addps(<4 x floa
 ; HASWELL-LABEL: test_addps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addps:
 ; BTVER2:       # BB#0:
@@ -85,8 +85,8 @@ define float @test_addss(float %a0, floa
 ; HASWELL-LABEL: test_addss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addss:
 ; BTVER2:       # BB#0:
@@ -137,8 +137,8 @@ define <4 x float> @test_andps(<4 x floa
 ; HASWELL-LABEL: test_andps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andps:
 ; BTVER2:       # BB#0:
@@ -193,8 +193,8 @@ define <4 x float> @test_andnotps(<4 x f
 ; HASWELL-LABEL: test_andnotps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andnotps:
 ; BTVER2:       # BB#0:
@@ -251,9 +251,9 @@ define <4 x float> @test_cmpps(<4 x floa
 ; HASWELL-LABEL: test_cmpps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cmpps:
 ; BTVER2:       # BB#0:
@@ -306,7 +306,7 @@ define float @test_cmpss(float %a0, floa
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cmpss:
 ; BTVER2:       # BB#0:
@@ -399,7 +399,7 @@ define i32 @test_comiss(<4 x float> %a0,
 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_comiss:
 ; BTVER2:       # BB#0:
@@ -470,7 +470,7 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ; HASWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
 ; HASWELL-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsi2ss:
 ; BTVER2:       # BB#0:
@@ -523,10 +523,10 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ;
 ; HASWELL-LABEL: test_cvtsi2ssq:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
 ; HASWELL-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsi2ssq:
 ; BTVER2:       # BB#0:
@@ -580,9 +580,9 @@ define i32 @test_cvtss2si(float %a0, flo
 ; HASWELL-LABEL: test_cvtss2si:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [4:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtss2si:
 ; BTVER2:       # BB#0:
@@ -639,9 +639,9 @@ define i64 @test_cvtss2siq(float %a0, fl
 ; HASWELL-LABEL: test_cvtss2siq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [4:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtss2siq:
 ; BTVER2:       # BB#0:
@@ -698,9 +698,9 @@ define i32 @test_cvttss2si(float %a0, fl
 ; HASWELL-LABEL: test_cvttss2si:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [4:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttss2si:
 ; BTVER2:       # BB#0:
@@ -754,9 +754,9 @@ define i64 @test_cvttss2siq(float %a0, f
 ; HASWELL-LABEL: test_cvttss2siq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [4:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttss2siq:
 ; BTVER2:       # BB#0:
@@ -805,9 +805,9 @@ define <4 x float> @test_divps(<4 x floa
 ;
 ; HASWELL-LABEL: test_divps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_divps:
 ; BTVER2:       # BB#0:
@@ -853,9 +853,9 @@ define float @test_divss(float %a0, floa
 ;
 ; HASWELL-LABEL: test_divss:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_divss:
 ; BTVER2:       # BB#0:
@@ -902,8 +902,8 @@ define void @test_ldmxcsr(i32 %a0) {
 ; HASWELL-LABEL: test_ldmxcsr:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; HASWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ldmxcsr:
 ; BTVER2:       # BB#0:
@@ -952,8 +952,8 @@ define <4 x float> @test_maxps(<4 x floa
 ; HASWELL-LABEL: test_maxps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_maxps:
 ; BTVER2:       # BB#0:
@@ -1001,8 +1001,8 @@ define <4 x float> @test_maxss(<4 x floa
 ; HASWELL-LABEL: test_maxss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_maxss:
 ; BTVER2:       # BB#0:
@@ -1050,8 +1050,8 @@ define <4 x float> @test_minps(<4 x floa
 ; HASWELL-LABEL: test_minps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_minps:
 ; BTVER2:       # BB#0:
@@ -1099,8 +1099,8 @@ define <4 x float> @test_minss(<4 x floa
 ; HASWELL-LABEL: test_minss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_minss:
 ; BTVER2:       # BB#0:
@@ -1151,10 +1151,10 @@ define void @test_movaps(<4 x float> *%a
 ;
 ; HASWELL-LABEL: test_movaps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movaps:
 ; BTVER2:       # BB#0:
@@ -1207,7 +1207,7 @@ define <4 x float> @test_movhlps(<4 x fl
 ; HASWELL-LABEL: test_movhlps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movhlps:
 ; BTVER2:       # BB#0:
@@ -1257,10 +1257,10 @@ define void @test_movhps(<4 x float> %a0
 ;
 ; HASWELL-LABEL: test_movhps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movhps:
 ; BTVER2:       # BB#0:
@@ -1316,7 +1316,7 @@ define <4 x float> @test_movlhps(<4 x fl
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movlhps:
 ; BTVER2:       # BB#0:
@@ -1365,10 +1365,10 @@ define void @test_movlps(<4 x float> %a0
 ;
 ; HASWELL-LABEL: test_movlps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movlps:
 ; BTVER2:       # BB#0:
@@ -1419,7 +1419,7 @@ define i32 @test_movmskps(<4 x float> %a
 ; HASWELL-LABEL: test_movmskps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movmskps:
 ; BTVER2:       # BB#0:
@@ -1465,7 +1465,7 @@ define void @test_movntps(<4 x float> %a
 ; HASWELL-LABEL: test_movntps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movntps:
 ; BTVER2:       # BB#0:
@@ -1511,10 +1511,10 @@ define void @test_movss_mem(float* %a0,
 ;
 ; HASWELL-LABEL: test_movss_mem:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movss_mem:
 ; BTVER2:       # BB#0:
@@ -1565,7 +1565,7 @@ define <4 x float> @test_movss_reg(<4 x
 ; HASWELL-LABEL: test_movss_reg:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movss_reg:
 ; BTVER2:       # BB#0:
@@ -1611,10 +1611,10 @@ define void @test_movups(<4 x float> *%a
 ;
 ; HASWELL-LABEL: test_movups:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movups:
 ; BTVER2:       # BB#0:
@@ -1663,8 +1663,8 @@ define <4 x float> @test_mulps(<4 x floa
 ; HASWELL-LABEL: test_mulps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mulps:
 ; BTVER2:       # BB#0:
@@ -1711,8 +1711,8 @@ define float @test_mulss(float %a0, floa
 ; HASWELL-LABEL: test_mulss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mulss:
 ; BTVER2:       # BB#0:
@@ -1763,8 +1763,8 @@ define <4 x float> @test_orps(<4 x float
 ; HASWELL-LABEL: test_orps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_orps:
 ; BTVER2:       # BB#0:
@@ -1816,8 +1816,8 @@ define void @test_prefetchnta(i8* %a0) {
 ;
 ; HASWELL-LABEL: test_prefetchnta:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    prefetchnta (%rdi) # sched: [4:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    prefetchnta (%rdi) # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_prefetchnta:
 ; BTVER2:       # BB#0:
@@ -1867,9 +1867,9 @@ define <4 x float> @test_rcpps(<4 x floa
 ; HASWELL-LABEL: test_rcpps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [5:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_rcpps:
 ; BTVER2:       # BB#0:
@@ -1929,11 +1929,11 @@ define <4 x float> @test_rcpss(float %a0
 ;
 ; HASWELL-LABEL: test_rcpss:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; HASWELL-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; HASWELL-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_rcpss:
 ; BTVER2:       # BB#0:
@@ -1994,9 +1994,9 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; HASWELL-LABEL: test_rsqrtps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_rsqrtps:
 ; BTVER2:       # BB#0:
@@ -2057,10 +2057,10 @@ define <4 x float> @test_rsqrtss(float %
 ; HASWELL-LABEL: test_rsqrtss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_rsqrtss:
 ; BTVER2:       # BB#0:
@@ -2116,8 +2116,8 @@ define void @test_sfence() {
 ;
 ; HASWELL-LABEL: test_sfence:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    sfence # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    sfence # sched: [1:0.33]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_sfence:
 ; BTVER2:       # BB#0:
@@ -2165,8 +2165,8 @@ define <4 x float> @test_shufps(<4 x flo
 ; HASWELL-LABEL: test_shufps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; HASWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_shufps:
 ; BTVER2:       # BB#0:
@@ -2217,10 +2217,10 @@ define <4 x float> @test_sqrtps(<4 x flo
 ;
 ; HASWELL-LABEL: test_sqrtps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
-; HASWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
+; HASWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_sqrtps:
 ; BTVER2:       # BB#0:
@@ -2280,11 +2280,11 @@ define <4 x float> @test_sqrtss(<4 x flo
 ;
 ; HASWELL-LABEL: test_sqrtss:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
-; HASWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [4:0.50]
-; HASWELL-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
+; HASWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_sqrtss:
 ; BTVER2:       # BB#0:
@@ -2336,9 +2336,9 @@ define i32 @test_stmxcsr() {
 ;
 ; HASWELL-LABEL: test_stmxcsr:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; HASWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; HASWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_stmxcsr:
 ; BTVER2:       # BB#0:
@@ -2387,8 +2387,8 @@ define <4 x float> @test_subps(<4 x floa
 ; HASWELL-LABEL: test_subps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_subps:
 ; BTVER2:       # BB#0:
@@ -2435,8 +2435,8 @@ define float @test_subss(float %a0, floa
 ; HASWELL-LABEL: test_subss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_subss:
 ; BTVER2:       # BB#0:
@@ -2524,7 +2524,7 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ucomiss:
 ; BTVER2:       # BB#0:
@@ -2593,8 +2593,8 @@ define <4 x float> @test_unpckhps(<4 x f
 ; HASWELL-LABEL: test_unpckhps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_unpckhps:
 ; BTVER2:       # BB#0:
@@ -2645,8 +2645,8 @@ define <4 x float> @test_unpcklps(<4 x f
 ; HASWELL-LABEL: test_unpcklps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_unpcklps:
 ; BTVER2:       # BB#0:
@@ -2697,8 +2697,8 @@ define <4 x float> @test_xorps(<4 x floa
 ; HASWELL-LABEL: test_xorps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_xorps:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Mon Aug 28 03:04:16 2017
@@ -37,8 +37,8 @@ define <2 x double> @test_addpd(<2 x dou
 ; HASWELL-LABEL: test_addpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addpd:
 ; BTVER2:       # BB#0:
@@ -85,8 +85,8 @@ define double @test_addsd(double %a0, do
 ; HASWELL-LABEL: test_addsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addsd:
 ; BTVER2:       # BB#0:
@@ -137,9 +137,9 @@ define <2 x double> @test_andpd(<2 x dou
 ; HASWELL-LABEL: test_andpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andpd:
 ; BTVER2:       # BB#0:
@@ -197,9 +197,9 @@ define <2 x double> @test_andnotpd(<2 x
 ; HASWELL-LABEL: test_andnotpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_andnotpd:
 ; BTVER2:       # BB#0:
@@ -259,9 +259,9 @@ define <2 x double> @test_cmppd(<2 x dou
 ; HASWELL-LABEL: test_cmppd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cmppd:
 ; BTVER2:       # BB#0:
@@ -314,7 +314,7 @@ define double @test_cmpsd(double %a0, do
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cmpsd:
 ; BTVER2:       # BB#0:
@@ -407,7 +407,7 @@ define i32 @test_comisd(<2 x double> %a0
 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_comisd:
 ; BTVER2:       # BB#0:
@@ -476,9 +476,9 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; HASWELL-LABEL: test_cvtdq2pd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtdq2pd:
 ; BTVER2:       # BB#0:
@@ -534,10 +534,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ;
 ; HASWELL-LABEL: test_cvtdq2ps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtdq2ps:
 ; BTVER2:       # BB#0:
@@ -592,9 +592,9 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ; HASWELL-LABEL: test_cvtpd2dq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtpd2dq:
 ; BTVER2:       # BB#0:
@@ -650,9 +650,9 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ; HASWELL-LABEL: test_cvtpd2ps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtpd2ps:
 ; BTVER2:       # BB#0:
@@ -708,9 +708,9 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; HASWELL-LABEL: test_cvtps2dq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; HASWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtps2dq:
 ; BTVER2:       # BB#0:
@@ -766,9 +766,9 @@ define <2 x double> @test_cvtps2pd(<4 x
 ; HASWELL-LABEL: test_cvtps2pd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtps2pd:
 ; BTVER2:       # BB#0:
@@ -824,9 +824,9 @@ define i32 @test_cvtsd2si(double %a0, do
 ; HASWELL-LABEL: test_cvtsd2si:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [4:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsd2si:
 ; BTVER2:       # BB#0:
@@ -883,9 +883,9 @@ define i64 @test_cvtsd2siq(double %a0, d
 ; HASWELL-LABEL: test_cvtsd2siq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [4:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsd2siq:
 ; BTVER2:       # BB#0:
@@ -947,10 +947,10 @@ define float @test_cvtsd2ss(double %a0,
 ; HASWELL-LABEL: test_cvtsd2ss:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50]
 ; HASWELL-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsd2ss:
 ; BTVER2:       # BB#0:
@@ -1008,7 +1008,7 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; HASWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
 ; HASWELL-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsi2sd:
 ; BTVER2:       # BB#0:
@@ -1064,7 +1064,7 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; HASWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
 ; HASWELL-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtsi2sdq:
 ; BTVER2:       # BB#0:
@@ -1125,10 +1125,10 @@ define double @test_cvtss2sd(float %a0,
 ; HASWELL-LABEL: test_cvtss2sd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvtss2sd:
 ; BTVER2:       # BB#0:
@@ -1185,9 +1185,9 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ; HASWELL-LABEL: test_cvttpd2dq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; HASWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttpd2dq:
 ; BTVER2:       # BB#0:
@@ -1244,9 +1244,9 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; HASWELL-LABEL: test_cvttps2dq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; HASWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttps2dq:
 ; BTVER2:       # BB#0:
@@ -1300,9 +1300,9 @@ define i32 @test_cvttsd2si(double %a0, d
 ; HASWELL-LABEL: test_cvttsd2si:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [4:1.00]
 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttsd2si:
 ; BTVER2:       # BB#0:
@@ -1356,9 +1356,9 @@ define i64 @test_cvttsd2siq(double %a0,
 ; HASWELL-LABEL: test_cvttsd2siq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [4:1.00]
 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_cvttsd2siq:
 ; BTVER2:       # BB#0:
@@ -1407,9 +1407,9 @@ define <2 x double> @test_divpd(<2 x dou
 ;
 ; HASWELL-LABEL: test_divpd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_divpd:
 ; BTVER2:       # BB#0:
@@ -1455,9 +1455,9 @@ define double @test_divsd(double %a0, do
 ;
 ; HASWELL-LABEL: test_divsd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_divsd:
 ; BTVER2:       # BB#0:
@@ -1505,8 +1505,8 @@ define void @test_lfence() {
 ;
 ; HASWELL-LABEL: test_lfence:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    lfence # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    lfence # sched: [2:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lfence:
 ; BTVER2:       # BB#0:
@@ -1551,8 +1551,8 @@ define void @test_mfence() {
 ;
 ; HASWELL-LABEL: test_mfence:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    mfence # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    mfence # sched: [2:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mfence:
 ; BTVER2:       # BB#0:
@@ -1595,8 +1595,8 @@ define void @test_maskmovdqu(<16 x i8> %
 ;
 ; HASWELL-LABEL: test_maskmovdqu:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [14:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_maskmovdqu:
 ; BTVER2:       # BB#0:
@@ -1640,8 +1640,8 @@ define <2 x double> @test_maxpd(<2 x dou
 ; HASWELL-LABEL: test_maxpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_maxpd:
 ; BTVER2:       # BB#0:
@@ -1689,8 +1689,8 @@ define <2 x double> @test_maxsd(<2 x dou
 ; HASWELL-LABEL: test_maxsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_maxsd:
 ; BTVER2:       # BB#0:
@@ -1738,8 +1738,8 @@ define <2 x double> @test_minpd(<2 x dou
 ; HASWELL-LABEL: test_minpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_minpd:
 ; BTVER2:       # BB#0:
@@ -1787,8 +1787,8 @@ define <2 x double> @test_minsd(<2 x dou
 ; HASWELL-LABEL: test_minsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_minsd:
 ; BTVER2:       # BB#0:
@@ -1839,10 +1839,10 @@ define void @test_movapd(<2 x double> *%
 ;
 ; HASWELL-LABEL: test_movapd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movapd:
 ; BTVER2:       # BB#0:
@@ -1894,10 +1894,10 @@ define void @test_movdqa(<2 x i64> *%a0,
 ;
 ; HASWELL-LABEL: test_movdqa:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movdqa:
 ; BTVER2:       # BB#0:
@@ -1949,10 +1949,10 @@ define void @test_movdqu(<2 x i64> *%a0,
 ;
 ; HASWELL-LABEL: test_movdqu:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movdqu:
 ; BTVER2:       # BB#0:
@@ -2017,12 +2017,12 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; HASWELL-LABEL: test_movd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovd %xmm0, %eax # sched: [1:1.00]
 ; HASWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movd:
 ; BTVER2:       # BB#0:
@@ -2098,12 +2098,12 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; HASWELL-LABEL: test_movd_64:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovq %xmm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movd_64:
 ; BTVER2:       # BB#0:
@@ -2166,10 +2166,10 @@ define void @test_movhpd(<2 x double> %a
 ;
 ; HASWELL-LABEL: test_movhpd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movhpd:
 ; BTVER2:       # BB#0:
@@ -2224,10 +2224,10 @@ define void @test_movlpd(<2 x double> %a
 ;
 ; HASWELL-LABEL: test_movlpd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movlpd:
 ; BTVER2:       # BB#0:
@@ -2277,7 +2277,7 @@ define i32 @test_movmskpd(<2 x double> %
 ; HASWELL-LABEL: test_movmskpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movmskpd:
 ; BTVER2:       # BB#0:
@@ -2324,7 +2324,7 @@ define void @test_movntdqa(<2 x i64> %a0
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movntdqa:
 ; BTVER2:       # BB#0:
@@ -2371,7 +2371,7 @@ define void @test_movntpd(<2 x double> %
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movntpd:
 ; BTVER2:       # BB#0:
@@ -2420,10 +2420,10 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ;
 ; HASWELL-LABEL: test_movq_mem:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movq_mem:
 ; BTVER2:       # BB#0:
@@ -2477,7 +2477,7 @@ define <2 x i64> @test_movq_reg(<2 x i64
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movq_reg:
 ; BTVER2:       # BB#0:
@@ -2526,10 +2526,10 @@ define void @test_movsd_mem(double* %a0,
 ;
 ; HASWELL-LABEL: test_movsd_mem:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; HASWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50]
 ; HASWELL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movsd_mem:
 ; BTVER2:       # BB#0:
@@ -2581,7 +2581,7 @@ define <2 x double> @test_movsd_reg(<2 x
 ; HASWELL-LABEL: test_movsd_reg:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movsd_reg:
 ; BTVER2:       # BB#0:
@@ -2627,10 +2627,10 @@ define void @test_movupd(<2 x double> *%
 ;
 ; HASWELL-LABEL: test_movupd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movupd:
 ; BTVER2:       # BB#0:
@@ -2679,8 +2679,8 @@ define <2 x double> @test_mulpd(<2 x dou
 ; HASWELL-LABEL: test_mulpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mulpd:
 ; BTVER2:       # BB#0:
@@ -2727,8 +2727,8 @@ define double @test_mulsd(double %a0, do
 ; HASWELL-LABEL: test_mulsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mulsd:
 ; BTVER2:       # BB#0:
@@ -2779,9 +2779,9 @@ define <2 x double> @test_orpd(<2 x doub
 ; HASWELL-LABEL: test_orpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_orpd:
 ; BTVER2:       # BB#0:
@@ -2839,8 +2839,8 @@ define <8 x i16> @test_packssdw(<4 x i32
 ; HASWELL-LABEL: test_packssdw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_packssdw:
 ; BTVER2:       # BB#0:
@@ -2893,8 +2893,8 @@ define <16 x i8> @test_packsswb(<8 x i16
 ; HASWELL-LABEL: test_packsswb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_packsswb:
 ; BTVER2:       # BB#0:
@@ -2947,8 +2947,8 @@ define <16 x i8> @test_packuswb(<8 x i16
 ; HASWELL-LABEL: test_packuswb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_packuswb:
 ; BTVER2:       # BB#0:
@@ -3001,8 +3001,8 @@ define <16 x i8> @test_paddb(<16 x i8> %
 ; HASWELL-LABEL: test_paddb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddb:
 ; BTVER2:       # BB#0:
@@ -3053,8 +3053,8 @@ define <4 x i32> @test_paddd(<4 x i32> %
 ; HASWELL-LABEL: test_paddd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddd:
 ; BTVER2:       # BB#0:
@@ -3101,8 +3101,8 @@ define <2 x i64> @test_paddq(<2 x i64> %
 ; HASWELL-LABEL: test_paddq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddq:
 ; BTVER2:       # BB#0:
@@ -3153,8 +3153,8 @@ define <16 x i8> @test_paddsb(<16 x i8>
 ; HASWELL-LABEL: test_paddsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddsb:
 ; BTVER2:       # BB#0:
@@ -3206,8 +3206,8 @@ define <8 x i16> @test_paddsw(<8 x i16>
 ; HASWELL-LABEL: test_paddsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddsw:
 ; BTVER2:       # BB#0:
@@ -3259,8 +3259,8 @@ define <16 x i8> @test_paddusb(<16 x i8>
 ; HASWELL-LABEL: test_paddusb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddusb:
 ; BTVER2:       # BB#0:
@@ -3312,8 +3312,8 @@ define <8 x i16> @test_paddusw(<8 x i16>
 ; HASWELL-LABEL: test_paddusw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddusw:
 ; BTVER2:       # BB#0:
@@ -3365,8 +3365,8 @@ define <8 x i16> @test_paddw(<8 x i16> %
 ; HASWELL-LABEL: test_paddw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_paddw:
 ; BTVER2:       # BB#0:
@@ -3417,9 +3417,9 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; HASWELL-LABEL: test_pand:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pand:
 ; BTVER2:       # BB#0:
@@ -3479,9 +3479,9 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; HASWELL-LABEL: test_pandn:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50]
+; HASWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pandn:
 ; BTVER2:       # BB#0:
@@ -3537,8 +3537,8 @@ define <16 x i8> @test_pavgb(<16 x i8> %
 ; HASWELL-LABEL: test_pavgb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pavgb:
 ; BTVER2:       # BB#0:
@@ -3590,8 +3590,8 @@ define <8 x i16> @test_pavgw(<8 x i16> %
 ; HASWELL-LABEL: test_pavgw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pavgw:
 ; BTVER2:       # BB#0:
@@ -3645,9 +3645,9 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; HASWELL-LABEL: test_pcmpeqb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqb:
 ; BTVER2:       # BB#0:
@@ -3704,9 +3704,9 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; HASWELL-LABEL: test_pcmpeqd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqd:
 ; BTVER2:       # BB#0:
@@ -3763,9 +3763,9 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; HASWELL-LABEL: test_pcmpeqw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqw:
 ; BTVER2:       # BB#0:
@@ -3823,9 +3823,9 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; HASWELL-LABEL: test_pcmpgtb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtb:
 ; BTVER2:       # BB#0:
@@ -3883,9 +3883,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; HASWELL-LABEL: test_pcmpgtd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtd:
 ; BTVER2:       # BB#0:
@@ -3943,9 +3943,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; HASWELL-LABEL: test_pcmpgtw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtw:
 ; BTVER2:       # BB#0:
@@ -3995,9 +3995,9 @@ define i16 @test_pextrw(<8 x i16> %a0) {
 ;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pextrw:
 ; BTVER2:       # BB#0:
@@ -4045,9 +4045,9 @@ define <8 x i16> @test_pinsrw(<8 x i16>
 ;
 ; HASWELL-LABEL: test_pinsrw:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrw:
 ; BTVER2:       # BB#0:
@@ -4102,8 +4102,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
 ; HASWELL-LABEL: test_pmaddwd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaddwd:
 ; BTVER2:       # BB#0:
@@ -4156,8 +4156,8 @@ define <8 x i16> @test_pmaxsw(<8 x i16>
 ; HASWELL-LABEL: test_pmaxsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxsw:
 ; BTVER2:       # BB#0:
@@ -4209,8 +4209,8 @@ define <16 x i8> @test_pmaxub(<16 x i8>
 ; HASWELL-LABEL: test_pmaxub:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxub:
 ; BTVER2:       # BB#0:
@@ -4262,8 +4262,8 @@ define <8 x i16> @test_pminsw(<8 x i16>
 ; HASWELL-LABEL: test_pminsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminsw:
 ; BTVER2:       # BB#0:
@@ -4315,8 +4315,8 @@ define <16 x i8> @test_pminub(<16 x i8>
 ; HASWELL-LABEL: test_pminub:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminub:
 ; BTVER2:       # BB#0:
@@ -4362,7 +4362,7 @@ define i32 @test_pmovmskb(<16 x i8> %a0)
 ; HASWELL-LABEL: test_pmovmskb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovmskb:
 ; BTVER2:       # BB#0:
@@ -4406,8 +4406,8 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
 ; HASWELL-LABEL: test_pmulhuw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhuw:
 ; BTVER2:       # BB#0:
@@ -4455,8 +4455,8 @@ define <8 x i16> @test_pmulhw(<8 x i16>
 ; HASWELL-LABEL: test_pmulhw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhw:
 ; BTVER2:       # BB#0:
@@ -4504,8 +4504,8 @@ define <8 x i16> @test_pmullw(<8 x i16>
 ; HASWELL-LABEL: test_pmullw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmullw:
 ; BTVER2:       # BB#0:
@@ -4560,8 +4560,8 @@ define <2 x i64> @test_pmuludq(<4 x i32>
 ; HASWELL-LABEL: test_pmuludq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmuludq:
 ; BTVER2:       # BB#0:
@@ -4614,9 +4614,9 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; HASWELL-LABEL: test_por:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_por:
 ; BTVER2:       # BB#0:
@@ -4674,8 +4674,8 @@ define <2 x i64> @test_psadbw(<16 x i8>
 ; HASWELL-LABEL: test_psadbw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psadbw:
 ; BTVER2:       # BB#0:
@@ -4730,9 +4730,9 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; HASWELL-LABEL: test_pshufd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; HASWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00]
+; HASWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pshufd:
 ; BTVER2:       # BB#0:
@@ -4788,9 +4788,9 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ; HASWELL-LABEL: test_pshufhw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:1.00]
+; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pshufhw:
 ; BTVER2:       # BB#0:
@@ -4846,9 +4846,9 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; HASWELL-LABEL: test_pshuflw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:1.00]
+; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pshuflw:
 ; BTVER2:       # BB#0:
@@ -4902,9 +4902,9 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ; HASWELL-LABEL: test_pslld:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pslld:
 ; BTVER2:       # BB#0:
@@ -4958,7 +4958,7 @@ define <4 x i32> @test_pslldq(<4 x i32>
 ; HASWELL-LABEL: test_pslldq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pslldq:
 ; BTVER2:       # BB#0:
@@ -5005,9 +5005,9 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ; HASWELL-LABEL: test_psllq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psllq:
 ; BTVER2:       # BB#0:
@@ -5063,9 +5063,9 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ; HASWELL-LABEL: test_psllw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psllw:
 ; BTVER2:       # BB#0:
@@ -5121,9 +5121,9 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ; HASWELL-LABEL: test_psrad:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psrad:
 ; BTVER2:       # BB#0:
@@ -5179,9 +5179,9 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ; HASWELL-LABEL: test_psraw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psraw:
 ; BTVER2:       # BB#0:
@@ -5237,9 +5237,9 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ; HASWELL-LABEL: test_psrld:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psrld:
 ; BTVER2:       # BB#0:
@@ -5293,7 +5293,7 @@ define <4 x i32> @test_psrldq(<4 x i32>
 ; HASWELL-LABEL: test_psrldq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psrldq:
 ; BTVER2:       # BB#0:
@@ -5340,9 +5340,9 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ; HASWELL-LABEL: test_psrlq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psrlq:
 ; BTVER2:       # BB#0:
@@ -5398,9 +5398,9 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ; HASWELL-LABEL: test_psrlw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00]
 ; HASWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psrlw:
 ; BTVER2:       # BB#0:
@@ -5456,8 +5456,8 @@ define <16 x i8> @test_psubb(<16 x i8> %
 ; HASWELL-LABEL: test_psubb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubb:
 ; BTVER2:       # BB#0:
@@ -5508,8 +5508,8 @@ define <4 x i32> @test_psubd(<4 x i32> %
 ; HASWELL-LABEL: test_psubd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubd:
 ; BTVER2:       # BB#0:
@@ -5556,8 +5556,8 @@ define <2 x i64> @test_psubq(<2 x i64> %
 ; HASWELL-LABEL: test_psubq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubq:
 ; BTVER2:       # BB#0:
@@ -5608,8 +5608,8 @@ define <16 x i8> @test_psubsb(<16 x i8>
 ; HASWELL-LABEL: test_psubsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubsb:
 ; BTVER2:       # BB#0:
@@ -5661,8 +5661,8 @@ define <8 x i16> @test_psubsw(<8 x i16>
 ; HASWELL-LABEL: test_psubsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubsw:
 ; BTVER2:       # BB#0:
@@ -5714,8 +5714,8 @@ define <16 x i8> @test_psubusb(<16 x i8>
 ; HASWELL-LABEL: test_psubusb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubusb:
 ; BTVER2:       # BB#0:
@@ -5767,8 +5767,8 @@ define <8 x i16> @test_psubusw(<8 x i16>
 ; HASWELL-LABEL: test_psubusw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubusw:
 ; BTVER2:       # BB#0:
@@ -5820,8 +5820,8 @@ define <8 x i16> @test_psubw(<8 x i16> %
 ; HASWELL-LABEL: test_psubw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psubw:
 ; BTVER2:       # BB#0:
@@ -5872,8 +5872,8 @@ define <16 x i8> @test_punpckhbw(<16 x i
 ; HASWELL-LABEL: test_punpckhbw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhbw:
 ; BTVER2:       # BB#0:
@@ -5926,9 +5926,9 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; HASWELL-LABEL: test_punpckhdq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:1.00]
+; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhdq:
 ; BTVER2:       # BB#0:
@@ -5982,9 +5982,9 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ; HASWELL-LABEL: test_punpckhqdq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhqdq:
 ; BTVER2:       # BB#0:
@@ -6038,8 +6038,8 @@ define <8 x i16> @test_punpckhwd(<8 x i1
 ; HASWELL-LABEL: test_punpckhwd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhwd:
 ; BTVER2:       # BB#0:
@@ -6090,8 +6090,8 @@ define <16 x i8> @test_punpcklbw(<16 x i
 ; HASWELL-LABEL: test_punpcklbw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpcklbw:
 ; BTVER2:       # BB#0:
@@ -6144,9 +6144,9 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; HASWELL-LABEL: test_punpckldq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpckldq:
 ; BTVER2:       # BB#0:
@@ -6200,9 +6200,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; HASWELL-LABEL: test_punpcklqdq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpcklqdq:
 ; BTVER2:       # BB#0:
@@ -6256,8 +6256,8 @@ define <8 x i16> @test_punpcklwd(<8 x i1
 ; HASWELL-LABEL: test_punpcklwd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_punpcklwd:
 ; BTVER2:       # BB#0:
@@ -6308,9 +6308,9 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; HASWELL-LABEL: test_pxor:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pxor:
 ; BTVER2:       # BB#0:
@@ -6364,9 +6364,9 @@ define <2 x double> @test_shufpd(<2 x do
 ; HASWELL-LABEL: test_shufpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00]
+; HASWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_shufpd:
 ; BTVER2:       # BB#0:
@@ -6420,10 +6420,10 @@ define <2 x double> @test_sqrtpd(<2 x do
 ;
 ; HASWELL-LABEL: test_sqrtpd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [15:1.00]
-; HASWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
+; HASWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [21:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_sqrtpd:
 ; BTVER2:       # BB#0:
@@ -6483,11 +6483,11 @@ define <2 x double> @test_sqrtsd(<2 x do
 ;
 ; HASWELL-LABEL: test_sqrtsd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
-; HASWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [4:0.50]
-; HASWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
+; HASWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_sqrtsd:
 ; BTVER2:       # BB#0:
@@ -6540,8 +6540,8 @@ define <2 x double> @test_subpd(<2 x dou
 ; HASWELL-LABEL: test_subpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_subpd:
 ; BTVER2:       # BB#0:
@@ -6588,8 +6588,8 @@ define double @test_subsd(double %a0, do
 ; HASWELL-LABEL: test_subsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_subsd:
 ; BTVER2:       # BB#0:
@@ -6677,7 +6677,7 @@ define i32 @test_ucomisd(<2 x double> %a
 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ucomisd:
 ; BTVER2:       # BB#0:
@@ -6746,9 +6746,9 @@ define <2 x double> @test_unpckhpd(<2 x
 ; HASWELL-LABEL: test_unpckhpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_unpckhpd:
 ; BTVER2:       # BB#0:
@@ -6808,9 +6808,9 @@ define <2 x double> @test_unpcklpd(<2 x
 ; HASWELL-LABEL: test_unpcklpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_unpcklpd:
 ; BTVER2:       # BB#0:
@@ -6864,9 +6864,9 @@ define <2 x double> @test_xorpd(<2 x dou
 ; HASWELL-LABEL: test_xorpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_xorpd:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Mon Aug 28 03:04:16 2017
@@ -37,8 +37,8 @@ define <2 x double> @test_addsubpd(<2 x
 ; HASWELL-LABEL: test_addsubpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addsubpd:
 ; BTVER2:       # BB#0:
@@ -86,8 +86,8 @@ define <4 x float> @test_addsubps(<4 x f
 ; HASWELL-LABEL: test_addsubps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_addsubps:
 ; BTVER2:       # BB#0:
@@ -135,8 +135,8 @@ define <2 x double> @test_haddpd(<2 x do
 ; HASWELL-LABEL: test_haddpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_haddpd:
 ; BTVER2:       # BB#0:
@@ -184,8 +184,8 @@ define <4 x float> @test_haddps(<4 x flo
 ; HASWELL-LABEL: test_haddps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_haddps:
 ; BTVER2:       # BB#0:
@@ -233,8 +233,8 @@ define <2 x double> @test_hsubpd(<2 x do
 ; HASWELL-LABEL: test_hsubpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_hsubpd:
 ; BTVER2:       # BB#0:
@@ -282,8 +282,8 @@ define <4 x float> @test_hsubps(<4 x flo
 ; HASWELL-LABEL: test_hsubps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_hsubps:
 ; BTVER2:       # BB#0:
@@ -328,8 +328,8 @@ define <16 x i8> @test_lddqu(i8* %a0) {
 ;
 ; HASWELL-LABEL: test_lddqu:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [4:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_lddqu:
 ; BTVER2:       # BB#0:
@@ -379,7 +379,7 @@ define void @test_monitor(i8* %a0, i32 %
 ; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
 ; HASWELL-NEXT:    monitor # sched: [100:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_monitor:
 ; BTVER2:       # BB#0:
@@ -432,9 +432,9 @@ define <2 x double> @test_movddup(<2 x d
 ; HASWELL-LABEL: test_movddup:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
+; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movddup:
 ; BTVER2:       # BB#0:
@@ -489,9 +489,9 @@ define <4 x float> @test_movshdup(<4 x f
 ; HASWELL-LABEL: test_movshdup:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
+; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movshdup:
 ; BTVER2:       # BB#0:
@@ -546,9 +546,9 @@ define <4 x float> @test_movsldup(<4 x f
 ; HASWELL-LABEL: test_movsldup:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
+; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movsldup:
 ; BTVER2:       # BB#0:
@@ -603,8 +603,8 @@ define void @test_mwait(i32 %a0, i32 %a1
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    mwait # sched: [100:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    mwait # sched: [20:2.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mwait:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Mon Aug 28 03:04:16 2017
@@ -34,8 +34,8 @@ define <2 x double> @test_blendpd(<2 x d
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blendpd:
 ; BTVER2:       # BB#0:
@@ -79,8 +79,8 @@ define <4 x float> @test_blendps(<4 x fl
 ; HASWELL-LABEL: test_blendps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blendps:
 ; BTVER2:       # BB#0:
@@ -127,8 +127,8 @@ define <2 x double> @test_blendvpd(<2 x
 ; HASWELL-LABEL: test_blendvpd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blendvpd:
 ; BTVER2:       # BB#0:
@@ -176,8 +176,8 @@ define <4 x float> @test_blendvps(<4 x f
 ; HASWELL-LABEL: test_blendvps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_blendvps:
 ; BTVER2:       # BB#0:
@@ -219,8 +219,8 @@ define <2 x double> @test_dppd(<2 x doub
 ; HASWELL-LABEL: test_dppd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_dppd:
 ; BTVER2:       # BB#0:
@@ -262,8 +262,8 @@ define <4 x float> @test_dpps(<4 x float
 ; HASWELL-LABEL: test_dpps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
-; HASWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_dpps:
 ; BTVER2:       # BB#0:
@@ -305,8 +305,8 @@ define <4 x float> @test_insertps(<4 x f
 ; HASWELL-LABEL: test_insertps:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_insertps:
 ; BTVER2:       # BB#0:
@@ -344,8 +344,8 @@ define <2 x i64> @test_movntdqa(i8* %a0)
 ;
 ; HASWELL-LABEL: test_movntdqa:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_movntdqa:
 ; BTVER2:       # BB#0:
@@ -382,9 +382,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8>
 ;
 ; HASWELL-LABEL: test_mpsadbw:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
+; HASWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_mpsadbw:
 ; BTVER2:       # BB#0:
@@ -427,8 +427,8 @@ define <8 x i16> @test_packusdw(<4 x i32
 ; HASWELL-LABEL: test_packusdw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_packusdw:
 ; BTVER2:       # BB#0:
@@ -477,8 +477,8 @@ define <16 x i8> @test_pblendvb(<16 x i8
 ; HASWELL-LABEL: test_pblendvb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pblendvb:
 ; BTVER2:       # BB#0:
@@ -521,7 +521,7 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
 ; HASWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pblendw:
 ; BTVER2:       # BB#0:
@@ -562,8 +562,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 ; HASWELL-LABEL: test_pcmpeqq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqq:
 ; BTVER2:       # BB#0:
@@ -605,9 +605,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i
 ;
 ; HASWELL-LABEL: test_pextrb:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pextrb:
 ; BTVER2:       # BB#0:
@@ -648,9 +648,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ;
 ; HASWELL-LABEL: test_pextrd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pextrd:
 ; BTVER2:       # BB#0:
@@ -690,9 +690,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <
 ;
 ; HASWELL-LABEL: test_pextrq:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:1.00]
+; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pextrq:
 ; BTVER2:       # BB#0:
@@ -732,9 +732,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 ;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pextrw:
 ; BTVER2:       # BB#0:
@@ -775,9 +775,9 @@ define <8 x i16> @test_phminposuw(<8 x i
 ;
 ; HASWELL-LABEL: test_phminposuw:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phminposuw:
 ; BTVER2:       # BB#0:
@@ -818,9 +818,9 @@ define <16 x i8> @test_pinsrb(<16 x i8>
 ;
 ; HASWELL-LABEL: test_pinsrb:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrb:
 ; BTVER2:       # BB#0:
@@ -860,9 +860,9 @@ define <4 x i32> @test_pinsrd(<4 x i32>
 ;
 ; HASWELL-LABEL: test_pinsrd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrd:
 ; BTVER2:       # BB#0:
@@ -905,10 +905,10 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ;
 ; HASWELL-LABEL: test_pinsrq:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
+; HASWELL-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrq:
 ; BTVER2:       # BB#0:
@@ -952,8 +952,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8>
 ; HASWELL-LABEL: test_pmaxsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxsb:
 ; BTVER2:       # BB#0:
@@ -995,8 +995,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32>
 ; HASWELL-LABEL: test_pmaxsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxsd:
 ; BTVER2:       # BB#0:
@@ -1038,8 +1038,8 @@ define <4 x i32> @test_pmaxud(<4 x i32>
 ; HASWELL-LABEL: test_pmaxud:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxud:
 ; BTVER2:       # BB#0:
@@ -1081,8 +1081,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16>
 ; HASWELL-LABEL: test_pmaxuw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxuw:
 ; BTVER2:       # BB#0:
@@ -1124,8 +1124,8 @@ define <16 x i8> @test_pminsb(<16 x i8>
 ; HASWELL-LABEL: test_pminsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminsb:
 ; BTVER2:       # BB#0:
@@ -1167,8 +1167,8 @@ define <4 x i32> @test_pminsd(<4 x i32>
 ; HASWELL-LABEL: test_pminsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminsd:
 ; BTVER2:       # BB#0:
@@ -1210,8 +1210,8 @@ define <4 x i32> @test_pminud(<4 x i32>
 ; HASWELL-LABEL: test_pminud:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminud:
 ; BTVER2:       # BB#0:
@@ -1253,8 +1253,8 @@ define <8 x i16> @test_pminuw(<8 x i16>
 ; HASWELL-LABEL: test_pminuw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pminuw:
 ; BTVER2:       # BB#0:
@@ -1300,9 +1300,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; HASWELL-LABEL: test_pmovsxbw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxbw:
 ; BTVER2:       # BB#0:
@@ -1351,9 +1351,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; HASWELL-LABEL: test_pmovsxbd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxbd:
 ; BTVER2:       # BB#0:
@@ -1402,9 +1402,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; HASWELL-LABEL: test_pmovsxbq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxbq:
 ; BTVER2:       # BB#0:
@@ -1453,9 +1453,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; HASWELL-LABEL: test_pmovsxdq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxdq:
 ; BTVER2:       # BB#0:
@@ -1504,9 +1504,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; HASWELL-LABEL: test_pmovsxwd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxwd:
 ; BTVER2:       # BB#0:
@@ -1555,9 +1555,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; HASWELL-LABEL: test_pmovsxwq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovsxwq:
 ; BTVER2:       # BB#0:
@@ -1606,9 +1606,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; HASWELL-LABEL: test_pmovzxbw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxbw:
 ; BTVER2:       # BB#0:
@@ -1657,9 +1657,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; HASWELL-LABEL: test_pmovzxbd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxbd:
 ; BTVER2:       # BB#0:
@@ -1708,9 +1708,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; HASWELL-LABEL: test_pmovzxbq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxbq:
 ; BTVER2:       # BB#0:
@@ -1759,9 +1759,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; HASWELL-LABEL: test_pmovzxdq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxdq:
 ; BTVER2:       # BB#0:
@@ -1810,9 +1810,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; HASWELL-LABEL: test_pmovzxwd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxwd:
 ; BTVER2:       # BB#0:
@@ -1861,9 +1861,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; HASWELL-LABEL: test_pmovzxwq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
+; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmovzxwq:
 ; BTVER2:       # BB#0:
@@ -1908,8 +1908,8 @@ define <2 x i64> @test_pmuldq(<4 x i32>
 ; HASWELL-LABEL: test_pmuldq:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmuldq:
 ; BTVER2:       # BB#0:
@@ -1953,7 +1953,7 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
 ; HASWELL-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmulld:
 ; BTVER2:       # BB#0:
@@ -2011,7 +2011,7 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; HASWELL-NEXT:    setb %cl # sched: [1:0.50]
 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
 ; HASWELL-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_ptest:
 ; BTVER2:       # BB#0:
@@ -2065,10 +2065,10 @@ define <2 x double> @test_roundpd(<2 x d
 ;
 ; HASWELL-LABEL: test_roundpd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00]
+; HASWELL-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25]
+; HASWELL-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00]
 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_roundpd:
 ; BTVER2:       # BB#0:
@@ -2116,10 +2116,10 @@ define <4 x float> @test_roundps(<4 x fl
 ;
 ; HASWELL-LABEL: test_roundps:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [10:2.00]
+; HASWELL-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [5:1.25]
+; HASWELL-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [6:2.00]
 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_roundps:
 ; BTVER2:       # BB#0:
@@ -2168,10 +2168,10 @@ define <2 x double> @test_roundsd(<2 x d
 ;
 ; HASWELL-LABEL: test_roundsd:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
-; HASWELL-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
+; HASWELL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
+; HASWELL-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_roundsd:
 ; BTVER2:       # BB#0:
@@ -2220,10 +2220,10 @@ define <4 x float> @test_roundss(<4 x fl
 ;
 ; HASWELL-LABEL: test_roundss:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
-; HASWELL-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
+; HASWELL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
+; HASWELL-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_roundss:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Mon Aug 28 03:04:16 2017
@@ -35,7 +35,7 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
 ; HASWELL-NEXT:    crc32b (%rdx), %edi # sched: [7:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_8:
 ; BTVER2:       # BB#0:
@@ -84,7 +84,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; HASWELL-NEXT:    crc32w %si, %edi # sched: [3:1.00]
 ; HASWELL-NEXT:    crc32w (%rdx), %edi # sched: [7:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_16:
 ; BTVER2:       # BB#0:
@@ -133,7 +133,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; HASWELL-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
 ; HASWELL-NEXT:    crc32l (%rdx), %edi # sched: [7:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_32:
 ; BTVER2:       # BB#0:
@@ -182,7 +182,7 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
 ; HASWELL-NEXT:    crc32b (%rdx), %edi # sched: [7:1.00]
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: crc32_64_8:
 ; BTVER2:       # BB#0:
@@ -231,7 +231,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; HASWELL-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
 ; HASWELL-NEXT:    crc32q (%rdx), %rdi # sched: [7:1.00]
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: crc32_64_64:
 ; BTVER2:       # BB#0:
@@ -297,14 +297,14 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT:    vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-NEXT:    vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
 ; HASWELL-NEXT:    movl %ecx, %esi # sched: [1:0.25]
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT:    vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00]
+; HASWELL-NEXT:    vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
 ; HASWELL-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
 ; HASWELL-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpestri:
 ; BTVER2:       # BB#0:
@@ -374,11 +374,11 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT:    vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
+; HASWELL-NEXT:    vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
 ; HASWELL-NEXT:    movl $7, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpestrm:
 ; BTVER2:       # BB#0:
@@ -441,7 +441,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ; HASWELL-NEXT:    vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
 ; HASWELL-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
 ; HASWELL-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpistri:
 ; BTVER2:       # BB#0:
@@ -489,9 +489,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i
 ;
 ; HASWELL-LABEL: test_pcmpistrm:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
-; HASWELL-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; HASWELL-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpistrm:
 ; BTVER2:       # BB#0:
@@ -534,7 +534,7 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtq:
 ; BTVER2:       # BB#0:
@@ -576,9 +576,9 @@ define <2 x i64> @test_pclmulqdq(<2 x i6
 ;
 ; HASWELL-LABEL: test_pclmulqdq:
 ; HASWELL:       # BB#0:
-; HASWELL-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
-; HASWELL-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00]
+; HASWELL-NEXT:    vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pclmulqdq:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Mon Aug 28 03:04:16 2017
@@ -42,9 +42,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; HASWELL-LABEL: test_pabsb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpabsb (%rdi), %xmm1 # sched: [5:0.50]
+; HASWELL-NEXT:    vpabsb (%rdi), %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pabsb:
 ; BTVER2:       # BB#0:
@@ -100,9 +100,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; HASWELL-LABEL: test_pabsd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpabsd (%rdi), %xmm1 # sched: [5:0.50]
+; HASWELL-NEXT:    vpabsd (%rdi), %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pabsd:
 ; BTVER2:       # BB#0:
@@ -158,9 +158,9 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; HASWELL-LABEL: test_pabsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpabsw (%rdi), %xmm1 # sched: [5:0.50]
+; HASWELL-NEXT:    vpabsw (%rdi), %xmm1 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pabsw:
 ; BTVER2:       # BB#0:
@@ -216,8 +216,8 @@ define <8 x i16> @test_palignr(<8 x i16>
 ; HASWELL-LABEL: test_palignr:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_palignr:
 ; BTVER2:       # BB#0:
@@ -264,8 +264,8 @@ define <4 x i32> @test_phaddd(<4 x i32>
 ; HASWELL-LABEL: test_phaddd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phaddd:
 ; BTVER2:       # BB#0:
@@ -313,8 +313,8 @@ define <8 x i16> @test_phaddsw(<8 x i16>
 ; HASWELL-LABEL: test_phaddsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phaddsw:
 ; BTVER2:       # BB#0:
@@ -362,8 +362,8 @@ define <8 x i16> @test_phaddw(<8 x i16>
 ; HASWELL-LABEL: test_phaddw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phaddw:
 ; BTVER2:       # BB#0:
@@ -411,8 +411,8 @@ define <4 x i32> @test_phsubd(<4 x i32>
 ; HASWELL-LABEL: test_phsubd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phsubd:
 ; BTVER2:       # BB#0:
@@ -460,8 +460,8 @@ define <8 x i16> @test_phsubsw(<8 x i16>
 ; HASWELL-LABEL: test_phsubsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phsubsw:
 ; BTVER2:       # BB#0:
@@ -509,8 +509,8 @@ define <8 x i16> @test_phsubw(<8 x i16>
 ; HASWELL-LABEL: test_phsubw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_phsubw:
 ; BTVER2:       # BB#0:
@@ -558,8 +558,8 @@ define <8 x i16> @test_pmaddubsw(<16 x i
 ; HASWELL-LABEL: test_pmaddubsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmaddubsw:
 ; BTVER2:       # BB#0:
@@ -608,8 +608,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16
 ; HASWELL-LABEL: test_pmulhrsw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhrsw:
 ; BTVER2:       # BB#0:
@@ -657,8 +657,8 @@ define <16 x i8> @test_pshufb(<16 x i8>
 ; HASWELL-LABEL: test_pshufb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_pshufb:
 ; BTVER2:       # BB#0:
@@ -710,8 +710,8 @@ define <16 x i8> @test_psignb(<16 x i8>
 ; HASWELL-LABEL: test_psignb:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psignb:
 ; BTVER2:       # BB#0:
@@ -763,8 +763,8 @@ define <4 x i32> @test_psignd(<4 x i32>
 ; HASWELL-LABEL: test_psignd:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psignd:
 ; BTVER2:       # BB#0:
@@ -816,8 +816,8 @@ define <8 x i16> @test_psignw(<8 x i16>
 ; HASWELL-LABEL: test_psignw:
 ; HASWELL:       # BB#0:
 ; HASWELL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT:    retq # sched: [1:1.00]
+; HASWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BTVER2-LABEL: test_psignw:
 ; BTVER2:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll Mon Aug 28 03:04:16 2017
@@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<
 ; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm9
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
 ; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
-; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
@@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<
 ; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm9
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
 ; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
-; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
 ; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-lshr-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-lshr-256.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-lshr-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-lshr-256.ll Mon Aug 28 03:04:16 2017
@@ -777,9 +777,9 @@ define <32 x i8> @splatvar_shift_v32i8(<
 ;
 ; AVX512DQ-LABEL: splatvar_shift_v32i8:
 ; AVX512DQ:       # BB#0:
-; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
 ; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
 ; AVX512DQ-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-shl-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-shl-256.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-shl-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-shl-256.ll Mon Aug 28 03:04:16 2017
@@ -713,9 +713,9 @@ define <32 x i8> @splatvar_shift_v32i8(<
 ;
 ; AVX512DQ-LABEL: splatvar_shift_v32i8:
 ; AVX512DQ:       # BB#0:
-; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
 ; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
 ; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
 ; AVX512DQ-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=311879&r1=311878&r2=311879&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Mon Aug 28 03:04:16 2017
@@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_
 ; KNL-NEXT:    vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
 ; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
 ; KNL-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
-; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm1
-; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm5
-; KNL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
-; KNL-NEXT:    vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
-; KNL-NEXT:    vinserti128 $1, %xmm5, %ymm0, %ymm0
-; KNL-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; KNL-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm5
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15]
 ; KNL-NEXT:    vextracti128 $1, %ymm3, %xmm3
 ; KNL-NEXT:    vpbroadcastw %xmm3, %ymm3
 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]




More information about the llvm-commits mailing list