[llvm] r346482 - [X86] Fix VZEROUPPER scheduling info on SNB, HSW, BDW, SXL, SKX.

Clement Courbet via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 9 01:49:06 PST 2018


Author: courbet
Date: Fri Nov  9 01:49:06 2018
New Revision: 346482

URL: http://llvm.org/viewvc/llvm-project?rev=346482&view=rev
Log:
[X86] Fix VZEROUPPER scheduling info on SNB,HSW,BDW,SXL,SKX.

Summary:
Starting from SNB, VZEROUPPER is handled by the renamer and uses no proc resources.
After HSW, it also has zero latency.

This fixes PR35606.

To reproduce:
Uops:
  llvm-exegesis -mode=uops -opcode-name=VZEROUPPER
Latency:
  echo -e '#LLVM-EXEGESIS-DEFREG XMM0 1\n#LLVM-EXEGESIS-DEFREG XMM1 1\nvzeroupper' | /tmp/llvm-exegesis -mode=latency -snippets-file=-
  echo -e '#LLVM-EXEGESIS-DEFREG XMM0 1\n#LLVM-EXEGESIS-DEFREG XMM1 1\nvzeroupper\naddps %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=latency -snippets-file=-

Reviewers: RKSimon, craig.topper, andreadb

Subscribers: gbedwell, llvm-commits

Differential Revision: https://reviews.llvm.org/D54107

Modified:
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
    llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
    llvm/trunk/test/CodeGen/X86/fma-schedule.ll
    llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
    llvm/trunk/test/CodeGen/X86/xop-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Fri Nov  9 01:49:06 2018
@@ -878,10 +878,10 @@ def BWWriteResGroup45 : SchedWriteRes<[B
 }
 def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>;
 
-def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> {
-  let Latency = 4;
+def BWWriteResGroup46 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>;
 

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Fri Nov  9 01:49:06 2018
@@ -1408,10 +1408,10 @@ def HWWriteResGroup81 : SchedWriteRes<[H
 }
 def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>;
 
-def HWWriteResGroup82 : SchedWriteRes<[HWPort015,HWPort0156]> {
-  let Latency = 4;
+def HWWriteResGroup82 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>;
 

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri Nov  9 01:49:06 2018
@@ -1112,6 +1112,13 @@ def SBWriteResGroupVzeroall : SchedWrite
 }
 def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
 
+def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
+  let Latency = 1;
+  let NumMicroOps = 4;
+  let ResourceCycles = [];
+}
+def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
+
 def: InstRW<[WriteZero], (instrs CLC)>;
 
 // Intruction variants handled by the renamer. These might not need execution

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Fri Nov  9 01:49:06 2018
@@ -897,10 +897,10 @@ def SKLWriteResGroup55 : SchedWriteRes<[
 }
 def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>;
 
-def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> {
-  let Latency = 4;
+def SKLWriteResGroup56 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>;
 

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Fri Nov  9 01:49:06 2018
@@ -1009,10 +1009,10 @@ def SKXWriteResGroup55 : SchedWriteRes<[
 }
 def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
 
-def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> {
-  let Latency = 4;
+def SKXWriteResGroup56 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
 

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Fri Nov  9 01:49:06 2018
@@ -1843,42 +1843,42 @@ define <4 x float> @test_extractf128(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_extractf128:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_extractf128:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_extractf128:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_extractf128:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_extractf128:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
 ; SKX-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_extractf128:
@@ -3008,37 +3008,37 @@ define i32 @test_movmskpd(<4 x double> %
 ; GENERIC-LABEL: test_movmskpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_movmskpd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskpd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovmskpd %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_movmskpd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovmskpd %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movmskpd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_movmskpd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_movmskpd:
@@ -3066,37 +3066,37 @@ define i32 @test_movmskps(<8 x float> %a
 ; GENERIC-LABEL: test_movmskps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskps:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_movmskps:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movmskps:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_movmskps:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_movmskps:
@@ -3126,7 +3126,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_movntdq:
@@ -3134,7 +3134,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntdq:
@@ -3142,7 +3142,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; HASWELL-NEXT:    #APP
 ; HASWELL-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_movntdq:
@@ -3150,7 +3150,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; BROADWELL-NEXT:    #APP
 ; BROADWELL-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movntdq:
@@ -3158,7 +3158,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; SKYLAKE-NEXT:    #APP
 ; SKYLAKE-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_movntdq:
@@ -3166,7 +3166,7 @@ define void @test_movntdq(<4 x i64> %a0,
 ; SKX-NEXT:    #APP
 ; SKX-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_movntdq:
@@ -5240,7 +5240,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_testpd_ymm:
@@ -5250,7 +5250,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testpd_ymm:
@@ -5260,7 +5260,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
 ; HASWELL-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_testpd_ymm:
@@ -5270,7 +5270,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    vtestpd (%rdi), %ymm0 # sched: [7:1.00]
 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_testpd_ymm:
@@ -5280,7 +5280,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:1.00]
 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_testpd_ymm:
@@ -5290,7 +5290,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ; SKX-NEXT:    setb %al # sched: [1:0.50]
 ; SKX-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:1.00]
 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_testpd_ymm:
@@ -5426,7 +5426,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_testps_ymm:
@@ -5436,7 +5436,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testps_ymm:
@@ -5446,7 +5446,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
 ; HASWELL-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_testps_ymm:
@@ -5456,7 +5456,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    vtestps (%rdi), %ymm0 # sched: [7:1.00]
 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_testps_ymm:
@@ -5466,7 +5466,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
 ; SKYLAKE-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:1.00]
 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_testps_ymm:
@@ -5476,7 +5476,7 @@ define i32 @test_testps_ymm(<8 x float>
 ; SKX-NEXT:    setb %al # sched: [1:0.50]
 ; SKX-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:1.00]
 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_testps_ymm:
@@ -5976,32 +5976,32 @@ declare void @llvm.x86.avx.vzeroall() no
 define void @test_zeroupper() {
 ; GENERIC-LABEL: test_zeroupper:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_zeroupper:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
+; SANDY-NEXT:    vzeroupper # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_zeroupper:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_zeroupper:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_zeroupper:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_zeroupper:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_zeroupper:

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri Nov  9 01:49:06 2018
@@ -177,7 +177,7 @@ define <4 x i32> @test_extracti128(<8 x
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
 ; GENERIC-NEXT:    vextracti128 $1, %ymm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_extracti128:
@@ -185,7 +185,7 @@ define <4 x i32> @test_extracti128(<8 x
 ; HASWELL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
 ; HASWELL-NEXT:    vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
 ; HASWELL-NEXT:    vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_extracti128:
@@ -193,7 +193,7 @@ define <4 x i32> @test_extracti128(<8 x
 ; BROADWELL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
 ; BROADWELL-NEXT:    vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_extracti128:
@@ -201,7 +201,7 @@ define <4 x i32> @test_extracti128(<8 x
 ; SKYLAKE-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
 ; SKYLAKE-NEXT:    vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_extracti128:
@@ -209,7 +209,7 @@ define <4 x i32> @test_extracti128(<8 x
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
 ; SKX-NEXT:    vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
 ; SKX-NEXT:    vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_extracti128:
@@ -474,31 +474,31 @@ define <4 x float> @test_gatherqps_ymm(<
 ; GENERIC-LABEL: test_gatherqps_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqps_ymm:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_gatherqps_ymm:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_gatherqps_ymm:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_gatherqps_ymm:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_gatherqps_ymm:
@@ -2920,31 +2920,31 @@ define <4 x i32> @test_pgatherqd_ymm(<4
 ; GENERIC-LABEL: test_pgatherqd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqd_ymm:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pgatherqd_ymm:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherqd_ymm:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pgatherqd_ymm:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pgatherqd_ymm:
@@ -4089,31 +4089,31 @@ define i32 @test_pmovmskb(<32 x i8> %a0)
 ; GENERIC-LABEL: test_pmovmskb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovmskb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vpmovmskb %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmovmskb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vpmovmskb %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmovmskb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmovmskb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pmovmskb:

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Fri Nov  9 01:49:06 2018
@@ -1447,13 +1447,13 @@ define <4 x float> @slto4f32(<4 x i64> %
 ; GENERIC-LABEL: slto4f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: slto4f32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = sitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %b
@@ -1463,13 +1463,13 @@ define <4 x float> @ulto4f32(<4 x i64> %
 ; GENERIC-LABEL: ulto4f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ulto4f32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = uitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %b
@@ -1538,14 +1538,14 @@ define <16 x i8> @f32to16uc(<16 x float>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f32to16uc:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
 ; SKX-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %res = fptoui <16 x float> %f to <16 x i8>
   ret <16 x i8> %res
@@ -1614,14 +1614,14 @@ define <8 x i16> @f64to8us(<8 x double>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to8us:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
 ; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %res = fptoui <8 x double> %f to <8 x i16>
   ret <8 x i16> %res
@@ -1632,14 +1632,14 @@ define <8 x i8> @f64to8uc(<8 x double> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to8uc:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
 ; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %res = fptoui <8 x double> %f to <8 x i8>
   ret <8 x i8> %res
@@ -1649,13 +1649,13 @@ define <4 x i32> @f64to4ui(<4 x double>
 ; GENERIC-LABEL: f64to4ui:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to4ui:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = fptoui <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
@@ -1737,13 +1737,13 @@ define <4 x i32> @f64to4si(<4 x double>
 ; GENERIC-LABEL: f64to4si:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to4si:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
@@ -1771,13 +1771,13 @@ define <4 x float> @f64to4f32(<4 x doubl
 ; GENERIC-LABEL: f64to4f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to4f32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a = fptrunc <4 x double> %b to <4 x float>
   ret <4 x float> %a
@@ -1789,7 +1789,7 @@ define <4 x float> @f64to4f32_mask(<4 x
 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: f64to4f32_mask:
@@ -1797,7 +1797,7 @@ define <4 x float> @f64to4f32_mask(<4 x
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
 ; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a = fptrunc <4 x double> %b to <4 x float>
   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
@@ -4307,7 +4307,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: trunc_16i32_to_16i1:
@@ -4316,7 +4316,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask_b = trunc <16 x i32>%a to <16 x i1>
   %mask = bitcast <16 x i1> %mask_b to i16
@@ -4422,14 +4422,14 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8i1_8i16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i16>
@@ -4475,14 +4475,14 @@ define void @extload_v8i64(<8 x i8>* %a,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: extload_v8i64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
 ; SKX-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %sign_load = load <8 x i8>, <8 x i8>* %a
   %c = sext <8 x i8> %sign_load to <8 x i64>
@@ -6124,13 +6124,13 @@ define void @mov_test18(i8 * %addr, <8 x
 ; GENERIC-LABEL: mov_test18:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test18:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <8 x i64>*
   store <8 x i64>%data, <8 x i64>* %vaddr, align 64
@@ -6141,13 +6141,13 @@ define void @mov_test19(i8 * %addr, <16
 ; GENERIC-LABEL: mov_test19:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test19:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <16 x i32>*
   store <16 x i32>%data, <16 x i32>* %vaddr, align 1
@@ -6158,13 +6158,13 @@ define void @mov_test20(i8 * %addr, <16
 ; GENERIC-LABEL: mov_test20:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test20:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <16 x i32>*
   store <16 x i32>%data, <16 x i32>* %vaddr, align 64
@@ -6190,13 +6190,13 @@ define void @mov_test22(i8 * %addr, <8 x
 ; GENERIC-LABEL: mov_test22:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test22:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <8 x i64>*
   store <8 x i64>%data, <8 x i64>* %vaddr, align 1
@@ -6222,13 +6222,13 @@ define void @mov_test24(i8 * %addr, <8 x
 ; GENERIC-LABEL: mov_test24:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test24:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <8 x double>*
   store <8 x double>%data, <8 x double>* %vaddr, align 64
@@ -6254,13 +6254,13 @@ define void @mov_test26(i8 * %addr, <16
 ; GENERIC-LABEL: mov_test26:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test26:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <16 x float>*
   store <16 x float>%data, <16 x float>* %vaddr, align 64
@@ -6286,13 +6286,13 @@ define void @mov_test28(i8 * %addr, <8 x
 ; GENERIC-LABEL: mov_test28:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test28:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <8 x double>*
   store <8 x double>%data, <8 x double>* %vaddr, align 1
@@ -6318,13 +6318,13 @@ define void @mov_test30(i8 * %addr, <16
 ; GENERIC-LABEL: mov_test30:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test30:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %vaddr = bitcast i8* %addr to <16 x float>*
   store <16 x float>%data, <16 x float>* %vaddr, align 1
@@ -6877,7 +6877,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
 ; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_test1:
@@ -6886,7 +6886,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
 ; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -6902,7 +6902,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_test2:
@@ -6912,7 +6912,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -6928,7 +6928,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_test3:
@@ -6938,7 +6938,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    andb $1, %al # sched: [1:0.25]
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -6976,7 +6976,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; GENERIC-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
 ; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test4:
@@ -6984,7 +6984,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x_gt_y = icmp sgt <4 x i64> %x, %y
   %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
@@ -7061,13 +7061,13 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; GENERIC-NEXT:  # %bb.2:
 ; GENERIC-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB386_1:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
 ; GENERIC-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vcmp_test8:
@@ -7077,13 +7077,13 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; SKX-NEXT:  # %bb.2:
 ; SKX-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB386_1:
 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cond = icmp sgt i32 %a1, %b1
   %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
@@ -7617,11 +7617,11 @@ define void @ktest_1(<8 x double> %in, d
 ; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB410_2: # %L2
 ; GENERIC-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ktest_1:
@@ -7634,11 +7634,11 @@ define void @ktest_1(<8 x double> %in, d
 ; SKX-NEXT:    je .LBB410_2 # sched: [1:0.50]
 ; SKX-NEXT:  # %bb.1: # %L1
 ; SKX-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB410_2: # %L2
 ; SKX-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %addr1 = getelementptr double, double * %base, i64 0
   %addr2 = getelementptr double, double * %base, i64 1
@@ -7686,12 +7686,12 @@ define void @ktest_2(<32 x float> %in, f
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB411_2: # %L2
 ; GENERIC-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ktest_2:
@@ -7711,12 +7711,12 @@ define void @ktest_2(<32 x float> %in, f
 ; SKX-NEXT:  # %bb.1: # %L1
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB411_2: # %L2
 ; SKX-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %addr1 = getelementptr float, float * %base, i64 0
   %addr2 = getelementptr float, float * %base, i64 1
@@ -7908,7 +7908,7 @@ define void @store_32i1(<32 x i1>* %a, <
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: store_32i1:
@@ -7916,7 +7916,7 @@ define void @store_32i1(<32 x i1>* %a, <
 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
 ; SKX-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   store <32 x i1> %v, <32 x i1>* %a
   ret void
@@ -7928,7 +7928,7 @@ define void @store_32i1_1(<32 x i1>* %a,
 ; GENERIC-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: store_32i1_1:
@@ -7936,7 +7936,7 @@ define void @store_32i1_1(<32 x i1>* %a,
 ; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %v1 = trunc <32 x i16> %v to <32 x i1>
   store <32 x i1> %v1, <32 x i1>* %a
@@ -7951,7 +7951,7 @@ define void @store_64i1(<64 x i1>* %a, <
 ; GENERIC-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: store_64i1:
@@ -7959,7 +7959,7 @@ define void @store_64i1(<64 x i1>* %a, <
 ; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   store <64 x i1> %v, <64 x i1>* %a
   ret void
@@ -7971,7 +7971,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_bitcast_v8i1_zext:
@@ -7979,7 +7979,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    kmovb %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %v1 = icmp eq <16 x i32> %a, zeroinitializer
    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -7995,7 +7995,7 @@ define i32 @test_bitcast_v16i1_zext(<16
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_bitcast_v16i1_zext:
@@ -8003,7 +8003,7 @@ define i32 @test_bitcast_v16i1_zext(<16
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    kmovw %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %v1 = icmp eq <16 x i32> %a, zeroinitializer
    %mask1 = bitcast <16 x i1> %v1 to i16

Modified: llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll Fri Nov  9 01:49:06 2018
@@ -18,7 +18,7 @@ define void @test_vpopcntd(<16 x i32> %a
 ; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
 ; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ICELAKE-LABEL: test_vpopcntd:
@@ -35,7 +35,7 @@ define void @test_vpopcntd(<16 x i32> %a
 ; ICELAKE-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
 ; ICELAKE-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; ICELAKE-NEXT:    #NO_APP
-; ICELAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; ICELAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; ICELAKE-NEXT:    retq # sched: [7:1.00]
   tail call void asm "vpopcntd $1, $0 \0A\09 vpopcntd $1, $0 {$3} \0A\09 vpopcntd $1, $0 {$3} {z} \0A\09 vpopcntd $2, $0 \0A\09 vpopcntd $2, $0 {$3} \0A\09 vpopcntd $2, $0 {$3} {z} \0A\09 vpopcntd $2{1to16}, $0 \0A\09 vpopcntd $2{1to16}, $0 {$3} \0A\09 vpopcntd $2{1to16}, $0 {$3} {z}", "v,v,*m,^Yk"(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) nounwind
   ret void
@@ -56,7 +56,7 @@ define void @test_vpopcntq(<8 x i64> %a0
 ; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
 ; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ICELAKE-LABEL: test_vpopcntq:
@@ -73,7 +73,7 @@ define void @test_vpopcntq(<8 x i64> %a0
 ; ICELAKE-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
 ; ICELAKE-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; ICELAKE-NEXT:    #NO_APP
-; ICELAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; ICELAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; ICELAKE-NEXT:    retq # sched: [7:1.00]
   tail call void asm "vpopcntq $1, $0 \0A\09 vpopcntq $1, $0 {$3} \0A\09 vpopcntq $1, $0 {$3} {z} \0A\09 vpopcntq $2, $0 \0A\09 vpopcntq $2, $0 {$3} \0A\09 vpopcntq $2, $0 {$3} {z} \0A\09 vpopcntq $2{1to8}, $0 \0A\09 vpopcntq $2{1to8}, $0 {$3} \0A\09 vpopcntq $2{1to8}, $0 {$3} {z}", "v,v,*m,^Yk"(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) nounwind
   ret void

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Fri Nov  9 01:49:06 2018
@@ -197,35 +197,35 @@ define <8 x i16> @test_vcvtps2ph_256(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtps2ph_256:
 ; IVY:       # %bb.0:
 ; IVY-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
 ; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
-; IVY-NEXT:    vzeroupper # sched: [100:0.33]
+; IVY-NEXT:    vzeroupper # sched: [1:1.00]
 ; IVY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_vcvtps2ph_256:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vcvtps2ph_256:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vcvtps2ph_256:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00]
 ; SKYLAKE-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BDVER2-LABEL: test_vcvtps2ph_256:

Modified: llvm/trunk/test/CodeGen/X86/fma-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll Fri Nov  9 01:49:06 2018
@@ -123,7 +123,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmaddpd_256:
@@ -149,7 +149,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; HASWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmaddpd_256:
@@ -162,7 +162,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmaddpd_256:
@@ -175,7 +175,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmaddpd_256:
@@ -200,7 +200,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; SKX-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKX-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmaddpd_256:
@@ -330,7 +330,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmaddps_256:
@@ -356,7 +356,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmaddps_256:
@@ -369,7 +369,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmaddps_256:
@@ -382,7 +382,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmaddps_256:
@@ -407,7 +407,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; SKX-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKX-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmaddps_256:
@@ -741,7 +741,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmaddsubpd_256:
@@ -767,7 +767,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmaddsubpd_256:
@@ -780,7 +780,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmaddsubpd_256:
@@ -793,7 +793,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmaddsubpd_256:
@@ -818,7 +818,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; SKX-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; SKX-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmaddsubpd_256:
@@ -948,7 +948,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmaddsubps_256:
@@ -974,7 +974,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmaddsubps_256:
@@ -987,7 +987,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmaddsubps_256:
@@ -1000,7 +1000,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmaddsubps_256:
@@ -1025,7 +1025,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; SKX-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
 ; SKX-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmaddsubps_256:
@@ -1159,7 +1159,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmsubaddpd_256:
@@ -1185,7 +1185,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmsubaddpd_256:
@@ -1198,7 +1198,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmsubaddpd_256:
@@ -1211,7 +1211,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmsubaddpd_256:
@@ -1236,7 +1236,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; SKX-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; SKX-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmsubaddpd_256:
@@ -1366,7 +1366,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmsubaddps_256:
@@ -1392,7 +1392,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmsubaddps_256:
@@ -1405,7 +1405,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmsubaddps_256:
@@ -1418,7 +1418,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmsubaddps_256:
@@ -1443,7 +1443,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; SKX-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
 ; SKX-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmsubaddps_256:
@@ -1577,7 +1577,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmsubpd_256:
@@ -1603,7 +1603,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; HASWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmsubpd_256:
@@ -1616,7 +1616,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmsubpd_256:
@@ -1629,7 +1629,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmsubpd_256:
@@ -1654,7 +1654,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; SKX-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKX-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmsubpd_256:
@@ -1784,7 +1784,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfmsubps_256:
@@ -1810,7 +1810,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; HASWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfmsubps_256:
@@ -1823,7 +1823,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfmsubps_256:
@@ -1836,7 +1836,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfmsubps_256:
@@ -1861,7 +1861,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKX-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfmsubps_256:
@@ -2195,7 +2195,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfnmaddpd_256:
@@ -2221,7 +2221,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfnmaddpd_256:
@@ -2234,7 +2234,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfnmaddpd_256:
@@ -2247,7 +2247,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfnmaddpd_256:
@@ -2272,7 +2272,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; SKX-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKX-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfnmaddpd_256:
@@ -2402,7 +2402,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfnmaddps_256:
@@ -2428,7 +2428,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfnmaddps_256:
@@ -2441,7 +2441,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfnmaddps_256:
@@ -2454,7 +2454,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfnmaddps_256:
@@ -2479,7 +2479,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; SKX-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
 ; SKX-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfnmaddps_256:
@@ -2813,7 +2813,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfnmsubpd_256:
@@ -2839,7 +2839,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfnmsubpd_256:
@@ -2852,7 +2852,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfnmsubpd_256:
@@ -2865,7 +2865,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfnmsubpd_256:
@@ -2890,7 +2890,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; SKX-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKX-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfnmsubpd_256:
@@ -3020,7 +3020,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER2-LABEL: test_vfnmsubps_256:
@@ -3046,7 +3046,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
 ; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; HASWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_vfnmsubps_256:
@@ -3059,7 +3059,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
+; BROADWELL-NEXT:    vzeroupper # sched: [0:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vfnmsubps_256:
@@ -3072,7 +3072,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
+; SKYLAKE-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; KNL-LABEL: test_vfnmsubps_256:
@@ -3097,7 +3097,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; SKX-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
 ; SKX-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    vzeroupper # sched: [4:1.00]
+; SKX-NEXT:    vzeroupper # sched: [0:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_vfnmsubps_256:

Modified: llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-schedule.ll Fri Nov  9 01:49:06 2018
@@ -48,7 +48,7 @@ define void @test_vfmaddpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmaddpd_256:
@@ -113,7 +113,7 @@ define void @test_vfmaddps_256(<8 x floa
 ; GENERIC-NEXT:    vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmaddps_256:
@@ -244,7 +244,7 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; GENERIC-NEXT:    vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmaddsubpd_256:
@@ -309,7 +309,7 @@ define void @test_vfmaddsubps_256(<8 x f
 ; GENERIC-NEXT:    vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmaddsubps_256:
@@ -378,7 +378,7 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; GENERIC-NEXT:    vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmsubaddpd_256:
@@ -443,7 +443,7 @@ define void @test_vfmsubaddps_256(<8 x f
 ; GENERIC-NEXT:    vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmsubaddps_256:
@@ -512,7 +512,7 @@ define void @test_vfmsubpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmsubpd_256:
@@ -577,7 +577,7 @@ define void @test_vfmsubps_256(<8 x floa
 ; GENERIC-NEXT:    vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfmsubps_256:
@@ -708,7 +708,7 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfnmaddpd_256:
@@ -773,7 +773,7 @@ define void @test_vfnmaddps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfnmaddps_256:
@@ -904,7 +904,7 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfnmsubpd_256:
@@ -969,7 +969,7 @@ define void @test_vfnmsubps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfnmsubps_256:

Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Fri Nov  9 01:49:06 2018
@@ -14,7 +14,7 @@ define void @test_vfrczpd(<2 x double> %
 ; GENERIC-NEXT:    vfrczpd (%rdi), %xmm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    vfrczpd (%rsi), %ymm1 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfrczpd:
@@ -62,7 +62,7 @@ define void @test_vfrczps(<4 x float> %a
 ; GENERIC-NEXT:    vfrczps (%rdi), %xmm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    vfrczps (%rsi), %ymm1 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vfrczps:
@@ -221,7 +221,7 @@ define void @test_vpcmov_256(<4 x i64> %
 ; GENERIC-NEXT:    vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vpcmov_256:
@@ -425,7 +425,7 @@ define void @test_vpermil2pd_256(<4 x do
 ; GENERIC-NEXT:    vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vpermil2pd_256:
@@ -509,7 +509,7 @@ define void @test_vpermil2ps_256(<8 x fl
 ; GENERIC-NEXT:    vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
+; GENERIC-NEXT:    vzeroupper # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER12-LABEL: test_vpermil2ps_256:

Modified: llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     1.00    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  20     8     1.00    *      *      U     vzeroall
-# CHECK-NEXT:  4      4     1.00    *      *      U     vzeroupper
+# CHECK-NEXT:  4      0     1.00    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - BWDivider
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 216.33 236.33 176.17 176.17 38.00  427.33 3.00   12.67
+# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  426.25 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2430,4 +2430,4 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   1.00    -     vzeroall
-# CHECK-NEXT:  -      -     1.08   1.08    -      -      -     1.08   0.75    -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vzeroupper

Modified: llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     1.00    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  20     9     2.00    *      *      U     vzeroall
-# CHECK-NEXT:  1      100   0.33    *      *      U     vzeroupper
+# CHECK-NEXT:  4      1     1.00    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SBDivider
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 246.83 317.33 39.00  365.83 179.50 179.50
+# CHECK-NEXT:  -     572.00 246.50 317.00 39.00  365.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2428,4 +2428,4 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     2.00    -      -     vzeroall
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vzeroupper

Modified: llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     1.00    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  20     8     1.00    *      *      U     vzeroall
-# CHECK-NEXT:  4      4     1.00    *      *      U     vzeroupper
+# CHECK-NEXT:  4      0     1.00    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - HWDivider
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     336.00 215.67 237.67 176.17 176.17 38.00  430.67 3.00   12.67
+# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  429.58 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2430,4 +2430,4 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   1.00    -     vzeroall
-# CHECK-NEXT:  -      -     1.08   1.08    -      -      -     1.08   0.75    -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vzeroupper

Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     1.00    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  20     9     2.00    *      *      U     vzeroall
-# CHECK-NEXT:  1      100   0.33    *      *      U     vzeroupper
+# CHECK-NEXT:  4      1     1.00    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SBDivider
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 246.83 317.33 39.00  365.83 179.50 179.50
+# CHECK-NEXT:  -     572.00 246.50 317.00 39.00  365.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2428,4 +2428,4 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     2.00    -      -     vzeroall
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vzeroupper

Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     0.33                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  16     16    4.00    *      *      U     vzeroall
-# CHECK-NEXT:  4      4     1.00    *      *      U     vzeroupper
+# CHECK-NEXT:  4      0     0.67    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SKLDivider
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     123.00 333.17 204.17 173.17 173.17 34.00  324.67 6.00   12.67
+# CHECK-NEXT:  -     123.00 332.08 203.08 173.17 173.17 34.00  323.58 5.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2430,4 +2430,4 @@ vzeroupper
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     4.00   4.00    -      -      -     4.00   4.00    -     vzeroall
-# CHECK-NEXT:  -      -     1.08   1.08    -      -      -     1.08   0.75    -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vzeroupper

Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s?rev=346482&r1=346481&r2=346482&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s Fri Nov  9 01:49:06 2018
@@ -1720,7 +1720,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     0.33                        vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  34     12    5.00    *      *      U     vzeroall
-# CHECK-NEXT:  4      4     1.00    *      *      U     vzeroupper
+# CHECK-NEXT:  4      0     0.67    *      *      U     vzeroupper
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SKXDivider
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     123.00 318.50 197.50 173.17 173.17 34.00  339.00 7.00   12.67
+# CHECK-NEXT:  -     123.00 317.42 196.42 173.17 173.17 34.00  337.92 6.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2430,4 +2430,4 @@ vzeroupper
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -     vxorps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vxorps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     2.00   5.00    -     vzeroall
-# CHECK-NEXT:  -      -     1.08   1.08    -      -      -     1.08   0.75    -     vzeroupper
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vzeroupper




More information about the llvm-commits mailing list