[llvm] r329388 - [X86][SandyBridge] SBWriteResPair +5cy Memory Folds

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 6 04:00:51 PDT 2018


Author: rksimon
Date: Fri Apr  6 04:00:51 2018
New Revision: 329388

URL: http://llvm.org/viewvc/llvm-project?rev=329388&view=rev
Log:
[X86][SandyBridge] SBWriteResPair +5cy Memory Folds

As mentioned on D44647, this patch increases the default memory latency to +5cy , which more closely matches what most custom cases are doing for reg-mem instructions.

I've bumped LoadLatency, ReadAfterLd and WriteLoad values to 5cy to be consistent.

As Sandy Bridge is currently our default generic model, this affects a lot of scheduling tests...

Differential Revision: https://reviews.llvm.org/D44654

Modified:
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
    llvm/trunk/test/CodeGen/X86/adx-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
    llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
    llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
    llvm/trunk/test/CodeGen/X86/clwb-schedule.ll
    llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
    llvm/trunk/test/CodeGen/X86/fma-schedule.ll
    llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
    llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
    llvm/trunk/test/CodeGen/X86/movbe-schedule.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
    llvm/trunk/test/CodeGen/X86/sha-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/tbm-schedule.ll
    llvm/trunk/test/CodeGen/X86/x87-schedule.ll
    llvm/trunk/test/CodeGen/X86/xop-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri Apr  6 04:00:51 2018
@@ -18,7 +18,7 @@ def SandyBridgeModel : SchedMachineModel
   // FIXME: Identify instructions that aren't a single fused micro-op.
   let IssueWidth = 4;
   let MicroOpBufferSize = 168; // Based on the reorder buffer.
-  let LoadLatency = 4;
+  let LoadLatency = 5;
   let MispredictPenalty = 16;
 
   // Based on the LSD (loop-stream detector) queue size.
@@ -63,9 +63,9 @@ def SBDivider : ProcResource<1>;
 // FP division and sqrt on port 0.
 def SBFPDivider : ProcResource<1>;
 
-// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
-def : ReadAdvance<ReadAfterLd, 4>;
+def : ReadAdvance<ReadAfterLd, 5>;
 
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
@@ -75,7 +75,7 @@ def : ReadAdvance<ReadAfterLd, 4>;
 multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
                           list<ProcResourceKind> ExePorts,
                           int Lat, list<int> Res = [1], int UOps = 1,
-                          int LoadLat = 4> {
+                          int LoadLat = 5> {
   // Register variant is using a single cycle on ExePort.
   def : WriteRes<SchedRW, ExePorts> {
     let Latency = Lat;
@@ -84,7 +84,7 @@ multiclass SBWriteResPair<X86FoldableSch
   }
 
   // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
-  // the latency (default = 4).
+  // the latency (default = 5).
   def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
     let Latency = !add(Lat, LoadLat);
     let ResourceCycles = !listconcat([1], Res);
@@ -97,7 +97,7 @@ multiclass SBWriteResPair<X86FoldableSch
 def : WriteRes<WriteRMW, [SBPort4]>;
 
 def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteLoad,  [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteLoad,  [SBPort23]> { let Latency = 5; }
 def : WriteRes<WriteMove,  [SBPort015]>;
 def : WriteRes<WriteZero,  []>;
 

Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Fri Apr  6 04:00:51 2018
@@ -15,7 +15,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pavgusb:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [5:1.00]
-; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [9:1.00]
+; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [10:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
@@ -29,7 +29,7 @@ declare x86_mmx @llvm.x86.3dnow.pavgusb(
 define i64 @test_pf2id(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pf2id:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pf2id (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pf2id (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pf2id %mm0, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -44,7 +44,7 @@ declare x86_mmx @llvm.x86.3dnow.pf2id(x8
 define i64 @test_pf2iw(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pf2iw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pf2iw (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pf2iw (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pf2iw %mm0, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -60,7 +60,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfacc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfacc (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfacc (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
@@ -75,7 +75,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfadd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfadd %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfadd (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfadd (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
@@ -90,7 +90,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pfcmpeq:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpeq %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfcmpeq (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfcmpeq (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
@@ -105,7 +105,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pfcmpge:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpge %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfcmpge (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfcmpge (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
@@ -120,7 +120,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pfcmpgt:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpgt %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfcmpgt (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfcmpgt (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
@@ -135,7 +135,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmax %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfmax (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfmax (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
@@ -150,7 +150,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmin %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfmin (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfmin (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
@@ -165,7 +165,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfmul:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmul %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfmul (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfmul (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
@@ -180,7 +180,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86
 ; CHECK-LABEL: test_pfnacc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfnacc (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfnacc (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
@@ -195,7 +195,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pfpnacc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfpnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfpnacc (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfpnacc (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
@@ -209,7 +209,7 @@ declare x86_mmx @llvm.x86.3dnowa.pfpnacc
 define i64 @test_pfrcp(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pfrcp:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pfrcp (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfrcp (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pfrcp %mm0, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -225,7 +225,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x
 ; CHECK-LABEL: test_pfrcpit1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfrcpit1 (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfrcpit1 (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
@@ -240,7 +240,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x
 ; CHECK-LABEL: test_pfrcpit2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfrcpit2 (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfrcpit2 (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
@@ -255,7 +255,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x
 ; CHECK-LABEL: test_pfrsqit1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfrsqit1 (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfrsqit1 (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
@@ -269,7 +269,7 @@ declare x86_mmx @llvm.x86.3dnow.pfrsqit1
 define i64 @test_pfrsqrt(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pfrsqrt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pfrsqrt (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfrsqrt (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pfrsqrt %mm0, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -285,7 +285,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_
 ; CHECK-LABEL: test_pfsub:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsub %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfsub (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfsub (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
@@ -300,7 +300,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86
 ; CHECK-LABEL: test_pfsubr:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsubr %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    pfsubr (%rdi), %mm0 # sched: [7:1.00]
+; CHECK-NEXT:    pfsubr (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
@@ -314,7 +314,7 @@ declare x86_mmx @llvm.x86.3dnow.pfsubr(x
 define i64 @test_pi2fd(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pi2fd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pi2fd (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT:    pi2fd (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pi2fd %mm0, %mm0 # sched: [4:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -329,7 +329,7 @@ declare x86_mmx @llvm.x86.3dnow.pi2fd(x8
 define i64 @test_pi2fw(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pi2fw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pi2fw (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT:    pi2fw (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pi2fw %mm0, %mm0 # sched: [4:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
@@ -345,7 +345,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pmulhrw:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pmulhrw %mm1, %mm0 # sched: [5:1.00]
-; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [9:1.00]
+; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [10:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)
@@ -381,7 +381,7 @@ define void @test_prefetchw(i8* %a0) opt
 define i64 @test_pswapd(x86_mmx* %a0) optsize {
 ; CHECK-LABEL: test_pswapd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [5:1.00]
+; CHECK-NEXT:    pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00]
 ; CHECK-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/adx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/adx-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/adx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/adx-schedule.ll Fri Apr  6 04:00:51 2018
@@ -12,8 +12,8 @@ define void @test_adcx(i32 %a0, i32* %a1
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    adcxl %edi, %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    adcxq %rdx, %rdx # sched: [1:0.33]
-; GENERIC-NEXT:    adcxl (%rsi), %edi # sched: [5:0.50]
-; GENERIC-NEXT:    adcxq (%rcx), %rdx # sched: [5:0.50]
+; GENERIC-NEXT:    adcxl (%rsi), %edi # sched: [6:0.50]
+; GENERIC-NEXT:    adcxq (%rcx), %rdx # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -65,8 +65,8 @@ define void @test_adox(i32 %a0, i32* %a1
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    adoxl %edi, %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    adoxq %rdx, %rdx # sched: [1:0.33]
-; GENERIC-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
-; GENERIC-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; GENERIC-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
+; GENERIC-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Fri Apr  6 04:00:51 2018
@@ -1607,13 +1607,13 @@ define <8 x float> @test_dpps(<8 x float
 ; GENERIC-LABEL: test_dpps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_dpps:

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri Apr  6 04:00:51 2018
@@ -9,7 +9,7 @@
 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_broadcasti128:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -236,7 +236,7 @@ define <4 x i32> @test_extracti128(<8 x
 define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
 ; GENERIC-LABEL: test_gatherdpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdpd:
@@ -271,7 +271,7 @@ declare <2 x double> @llvm.x86.avx2.gath
 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
 ; GENERIC-LABEL: test_gatherdpd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdpd_ymm:
@@ -306,7 +306,7 @@ declare <4 x double> @llvm.x86.avx2.gath
 define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherdps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdps:
@@ -341,7 +341,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
 define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
 ; GENERIC-LABEL: test_gatherdps_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdps_ymm:
@@ -376,7 +376,7 @@ declare <8 x float> @llvm.x86.avx2.gathe
 define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
 ; GENERIC-LABEL: test_gatherqpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqpd:
@@ -411,7 +411,7 @@ declare <2 x double> @llvm.x86.avx2.gath
 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
 ; GENERIC-LABEL: test_gatherqpd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqpd_ymm:
@@ -446,7 +446,7 @@ declare <4 x double> @llvm.x86.avx2.gath
 define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherqps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqps:
@@ -481,7 +481,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
 define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherqps_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i
 ; GENERIC-LABEL: test_inserti128:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -609,7 +609,7 @@ define <16 x i16> @test_mpsadbw(<32 x i8
 ; GENERIC-LABEL: test_mpsadbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mpsadbw:
@@ -653,7 +653,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %
 ; GENERIC-LABEL: test_pabsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsb %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpabsb (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpabsb (%rdi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -703,7 +703,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %
 ; GENERIC-LABEL: test_pabsd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsd %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpabsd (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpabsd (%rdi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -753,7 +753,7 @@ define <16 x i16> @test_pabsw(<16 x i16>
 ; GENERIC-LABEL: test_pabsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsw %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpabsw (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vpabsw (%rdi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -803,7 +803,7 @@ define <16 x i16> @test_packssdw(<8 x i3
 ; GENERIC-LABEL: test_packssdw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packssdw:
@@ -847,7 +847,7 @@ define <32 x i8> @test_packsswb(<16 x i1
 ; GENERIC-LABEL: test_packsswb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packsswb:
@@ -891,7 +891,7 @@ define <16 x i16> @test_packusdw(<8 x i3
 ; GENERIC-LABEL: test_packusdw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packusdw:
@@ -935,7 +935,7 @@ define <32 x i8> @test_packuswb(<16 x i1
 ; GENERIC-LABEL: test_packuswb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packuswb:
@@ -979,7 +979,7 @@ define <32 x i8> @test_paddb(<32 x i8> %
 ; GENERIC-LABEL: test_paddb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddb:
@@ -1021,7 +1021,7 @@ define <8 x i32> @test_paddd(<8 x i32> %
 ; GENERIC-LABEL: test_paddd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddd:
@@ -1063,7 +1063,7 @@ define <4 x i64> @test_paddq(<4 x i64> %
 ; GENERIC-LABEL: test_paddq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddq:
@@ -1105,7 +1105,7 @@ define <32 x i8> @test_paddsb(<32 x i8>
 ; GENERIC-LABEL: test_paddsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsb:
@@ -1148,7 +1148,7 @@ define <16 x i16> @test_paddsw(<16 x i16
 ; GENERIC-LABEL: test_paddsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsw:
@@ -1191,7 +1191,7 @@ define <32 x i8> @test_paddusb(<32 x i8>
 ; GENERIC-LABEL: test_paddusb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusb:
@@ -1234,7 +1234,7 @@ define <16 x i16> @test_paddusw(<16 x i1
 ; GENERIC-LABEL: test_paddusw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusw:
@@ -1277,7 +1277,7 @@ define <16 x i16> @test_paddw(<16 x i16>
 ; GENERIC-LABEL: test_paddw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddw:
@@ -1368,7 +1368,7 @@ define <4 x i64> @test_pand(<4 x i64> %a
 ; GENERIC-LABEL: test_pand:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpand (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1417,7 +1417,7 @@ define <4 x i64> @test_pandn(<4 x i64> %
 ; GENERIC-LABEL: test_pandn:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpandn (%rdi), %ymm0, %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1468,7 +1468,7 @@ define <32 x i8> @test_pavgb(<32 x i8> %
 ; GENERIC-LABEL: test_pavgb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgb:
@@ -1520,7 +1520,7 @@ define <16 x i16> @test_pavgw(<16 x i16>
 ; GENERIC-LABEL: test_pavgw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgw:
@@ -1572,7 +1572,7 @@ define <4 x i32> @test_pblendd(<4 x i32>
 ; GENERIC-LABEL: test_pblendd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
-; GENERIC-NEXT:    vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
+; GENERIC-NEXT:    vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
 ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1621,7 +1621,7 @@ define <8 x i32> @test_pblendd_ymm(<8 x
 ; GENERIC-LABEL: test_pblendd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
-; GENERIC-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
+; GENERIC-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [6:0.50]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1670,7 +1670,7 @@ define <32 x i8> @test_pblendvb(<32 x i8
 ; GENERIC-LABEL: test_pblendvb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; GENERIC-NEXT:    vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pblendvb:
@@ -1713,7 +1713,7 @@ define <16 x i16> @test_pblendw(<16 x i1
 ; GENERIC-LABEL: test_pblendw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
-; GENERIC-NEXT:    vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [5:0.50]
+; GENERIC-NEXT:    vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [6:0.50]
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
 ; GENERIC-LABEL: test_pbroadcastb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
 ; GENERIC-LABEL: test_pbroadcastb_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
 ; GENERIC-LABEL: test_pbroadcastd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1909,7 +1909,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
 ; GENERIC-LABEL: test_pbroadcastd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1958,7 +1958,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
 ; GENERIC-LABEL: test_pbroadcastq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2007,7 +2007,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
 ; GENERIC-LABEL: test_pbroadcastq_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2056,7 +2056,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
 ; GENERIC-LABEL: test_pbroadcastw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2105,7 +2105,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
 ; GENERIC-LABEL: test_pbroadcastw_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2154,7 +2154,7 @@ define <32 x i8> @test_pcmpeqb(<32 x i8>
 ; GENERIC-LABEL: test_pcmpeqb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqb:
@@ -2198,7 +2198,7 @@ define <8 x i32> @test_pcmpeqd(<8 x i32>
 ; GENERIC-LABEL: test_pcmpeqd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqd:
@@ -2242,7 +2242,7 @@ define <4 x i64> @test_pcmpeqq(<4 x i64>
 ; GENERIC-LABEL: test_pcmpeqq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqq:
@@ -2286,7 +2286,7 @@ define <16 x i16> @test_pcmpeqw(<16 x i1
 ; GENERIC-LABEL: test_pcmpeqw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqw:
@@ -2330,7 +2330,7 @@ define <32 x i8> @test_pcmpgtb(<32 x i8>
 ; GENERIC-LABEL: test_pcmpgtb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtb:
@@ -2374,7 +2374,7 @@ define <8 x i32> @test_pcmpgtd(<8 x i32>
 ; GENERIC-LABEL: test_pcmpgtd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtd:
@@ -2418,7 +2418,7 @@ define <4 x i64> @test_pcmpgtq(<4 x i64>
 ; GENERIC-LABEL: test_pcmpgtq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtq:
@@ -2462,7 +2462,7 @@ define <16 x i16> @test_pcmpgtw(<16 x i1
 ; GENERIC-LABEL: test_pcmpgtw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtw:
@@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i6
 ; GENERIC-LABEL: test_perm2i128:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %
 ; GENERIC-LABEL: test_permd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x do
 ; GENERIC-LABEL: test_permpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32
 ; GENERIC-LABEL: test_permps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %
 ; GENERIC-LABEL: test_permq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2752,7 +2752,7 @@ define <4 x i64> @test_permq(<4 x i64> %
 define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherdd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdd:
@@ -2787,7 +2787,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherdd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdd_ymm:
@@ -2822,7 +2822,7 @@ declare <8 x i32> @llvm.x86.avx2.gather.
 define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherdq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdq:
@@ -2857,7 +2857,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
 define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherdq_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdq_ymm:
@@ -2892,7 +2892,7 @@ declare <4 x i64> @llvm.x86.avx2.gather.
 define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherqd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqd:
@@ -2927,7 +2927,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherqd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2968,7 +2968,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherqq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqq:
@@ -3003,7 +3003,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
 define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherqq_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
+; GENERIC-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqq_ymm:
@@ -3039,7 +3039,7 @@ define <8 x i32> @test_phaddd(<8 x i32>
 ; GENERIC-LABEL: test_phaddd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddd (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddd:
@@ -3082,7 +3082,7 @@ define <16 x i16> @test_phaddsw(<16 x i1
 ; GENERIC-LABEL: test_phaddsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vphaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddsw:
@@ -3125,7 +3125,7 @@ define <16 x i16> @test_phaddw(<16 x i16
 ; GENERIC-LABEL: test_phaddw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddw (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddw:
@@ -3168,7 +3168,7 @@ define <8 x i32> @test_phsubd(<8 x i32>
 ; GENERIC-LABEL: test_phsubd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphsubd (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubd:
@@ -3211,7 +3211,7 @@ define <16 x i16> @test_phsubsw(<16 x i1
 ; GENERIC-LABEL: test_phsubsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vphsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubsw:
@@ -3254,7 +3254,7 @@ define <16 x i16> @test_phsubw(<16 x i16
 ; GENERIC-LABEL: test_phsubw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphsubw (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubw:
@@ -3297,7 +3297,7 @@ define <16 x i16> @test_pmaddubsw(<32 x
 ; GENERIC-LABEL: test_pmaddubsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddubsw:
@@ -3341,7 +3341,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16
 ; GENERIC-LABEL: test_pmaddwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddwd:
@@ -3581,7 +3581,7 @@ define <32 x i8> @test_pmaxsb(<32 x i8>
 ; GENERIC-LABEL: test_pmaxsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsb:
@@ -3624,7 +3624,7 @@ define <8 x i32> @test_pmaxsd(<8 x i32>
 ; GENERIC-LABEL: test_pmaxsd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsd:
@@ -3667,7 +3667,7 @@ define <16 x i16> @test_pmaxsw(<16 x i16
 ; GENERIC-LABEL: test_pmaxsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsw:
@@ -3710,7 +3710,7 @@ define <32 x i8> @test_pmaxub(<32 x i8>
 ; GENERIC-LABEL: test_pmaxub:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxub:
@@ -3753,7 +3753,7 @@ define <8 x i32> @test_pmaxud(<8 x i32>
 ; GENERIC-LABEL: test_pmaxud:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxud:
@@ -3796,7 +3796,7 @@ define <16 x i16> @test_pmaxuw(<16 x i16
 ; GENERIC-LABEL: test_pmaxuw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxuw:
@@ -3839,7 +3839,7 @@ define <32 x i8> @test_pminsb(<32 x i8>
 ; GENERIC-LABEL: test_pminsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsb:
@@ -3882,7 +3882,7 @@ define <8 x i32> @test_pminsd(<8 x i32>
 ; GENERIC-LABEL: test_pminsd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsd:
@@ -3925,7 +3925,7 @@ define <16 x i16> @test_pminsw(<16 x i16
 ; GENERIC-LABEL: test_pminsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsw:
@@ -3968,7 +3968,7 @@ define <32 x i8> @test_pminub(<32 x i8>
 ; GENERIC-LABEL: test_pminub:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminub:
@@ -4011,7 +4011,7 @@ define <8 x i32> @test_pminud(<8 x i32>
 ; GENERIC-LABEL: test_pminud:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminud:
@@ -4054,7 +4054,7 @@ define <16 x i16> @test_pminuw(<16 x i16
 ; GENERIC-LABEL: test_pminuw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminuw:
@@ -4138,7 +4138,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8
 ; GENERIC-LABEL: test_pmovsxbd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4189,7 +4189,7 @@ define <4 x i64> @test_pmovsxbq(<16 x i8
 ; GENERIC-LABEL: test_pmovsxbq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4240,7 +4240,7 @@ define <16 x i16> @test_pmovsxbw(<16 x i
 ; GENERIC-LABEL: test_pmovsxbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4289,7 +4289,7 @@ define <4 x i64> @test_pmovsxdq(<4 x i32
 ; GENERIC-LABEL: test_pmovsxdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4338,7 +4338,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16
 ; GENERIC-LABEL: test_pmovsxwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4387,7 +4387,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16
 ; GENERIC-LABEL: test_pmovsxwq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4438,7 +4438,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8
 ; GENERIC-LABEL: test_pmovzxbd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4489,7 +4489,7 @@ define <4 x i64> @test_pmovzxbq(<16 x i8
 ; GENERIC-LABEL: test_pmovzxbq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4540,7 +4540,7 @@ define <16 x i16> @test_pmovzxbw(<16 x i
 ; GENERIC-LABEL: test_pmovzxbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4589,7 +4589,7 @@ define <4 x i64> @test_pmovzxdq(<4 x i32
 ; GENERIC-LABEL: test_pmovzxdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4638,7 +4638,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16
 ; GENERIC-LABEL: test_pmovzxwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4687,7 +4687,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16
 ; GENERIC-LABEL: test_pmovzxwq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4738,7 +4738,7 @@ define <4 x i64> @test_pmuldq(<8 x i32>
 ; GENERIC-LABEL: test_pmuldq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuldq:
@@ -4782,7 +4782,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i
 ; GENERIC-LABEL: test_pmulhrsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhrsw:
@@ -4825,7 +4825,7 @@ define <16 x i16> @test_pmulhuw(<16 x i1
 ; GENERIC-LABEL: test_pmulhuw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhuw:
@@ -4868,7 +4868,7 @@ define <16 x i16> @test_pmulhw(<16 x i16
 ; GENERIC-LABEL: test_pmulhw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmulhw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhw:
@@ -4953,7 +4953,7 @@ define <16 x i16> @test_pmullw(<16 x i16
 ; GENERIC-LABEL: test_pmullw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmullw:
@@ -4995,7 +4995,7 @@ define <4 x i64> @test_pmuludq(<8 x i32>
 ; GENERIC-LABEL: test_pmuludq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuludq:
@@ -5039,7 +5039,7 @@ define <4 x i64> @test_por(<4 x i64> %a0
 ; GENERIC-LABEL: test_por:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5088,7 +5088,7 @@ define <4 x i64> @test_psadbw(<32 x i8>
 ; GENERIC-LABEL: test_psadbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsadbw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsadbw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psadbw:
@@ -5132,7 +5132,7 @@ define <32 x i8> @test_pshufb(<32 x i8>
 ; GENERIC-LABEL: test_pshufb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufb:
@@ -5175,7 +5175,7 @@ define <8 x i32> @test_pshufd(<8 x i32>
 ; GENERIC-LABEL: test_pshufd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5224,7 +5224,7 @@ define <16 x i16> @test_pshufhw(<16 x i1
 ; GENERIC-LABEL: test_pshufhw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [6:1.00]
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5273,7 +5273,7 @@ define <16 x i16> @test_pshuflw(<16 x i1
 ; GENERIC-LABEL: test_pshuflw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5322,7 +5322,7 @@ define <32 x i8> @test_psignb(<32 x i8>
 ; GENERIC-LABEL: test_psignb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignb:
@@ -5365,7 +5365,7 @@ define <8 x i32> @test_psignd(<8 x i32>
 ; GENERIC-LABEL: test_psignd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignd:
@@ -5408,7 +5408,7 @@ define <16 x i16> @test_psignw(<16 x i16
 ; GENERIC-LABEL: test_psignw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignw:
@@ -5451,7 +5451,7 @@ define <8 x i32> @test_pslld(<8 x i32> %
 ; GENERIC-LABEL: test_pslld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpslld (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5535,7 +5535,7 @@ define <4 x i64> @test_psllq(<4 x i64> %
 ; GENERIC-LABEL: test_psllq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5585,7 +5585,7 @@ define <4 x i32> @test_psllvd(<4 x i32>
 ; GENERIC-LABEL: test_psllvd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllvd:
@@ -5628,7 +5628,7 @@ define <8 x i32> @test_psllvd_ymm(<8 x i
 ; GENERIC-LABEL: test_psllvd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllvd_ymm:
@@ -5671,7 +5671,7 @@ define <2 x i64> @test_psllvq(<2 x i64>
 ; GENERIC-LABEL: test_psllvq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllvq:
@@ -5714,7 +5714,7 @@ define <4 x i64> @test_psllvq_ymm(<4 x i
 ; GENERIC-LABEL: test_psllvq_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllvq_ymm:
@@ -5757,7 +5757,7 @@ define <16 x i16> @test_psllw(<16 x i16>
 ; GENERIC-LABEL: test_psllw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsllw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5807,7 +5807,7 @@ define <8 x i32> @test_psrad(<8 x i32> %
 ; GENERIC-LABEL: test_psrad:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrad (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5857,7 +5857,7 @@ define <4 x i32> @test_psravd(<4 x i32>
 ; GENERIC-LABEL: test_psravd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsravd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psravd:
@@ -5900,7 +5900,7 @@ define <8 x i32> @test_psravd_ymm(<8 x i
 ; GENERIC-LABEL: test_psravd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsravd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psravd_ymm:
@@ -5943,7 +5943,7 @@ define <16 x i16> @test_psraw(<16 x i16>
 ; GENERIC-LABEL: test_psraw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsraw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5993,7 +5993,7 @@ define <8 x i32> @test_psrld(<8 x i32> %
 ; GENERIC-LABEL: test_psrld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrld (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6077,7 +6077,7 @@ define <4 x i64> @test_psrlq(<4 x i64> %
 ; GENERIC-LABEL: test_psrlq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6127,7 +6127,7 @@ define <4 x i32> @test_psrlvd(<4 x i32>
 ; GENERIC-LABEL: test_psrlvd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlvd:
@@ -6170,7 +6170,7 @@ define <8 x i32> @test_psrlvd_ymm(<8 x i
 ; GENERIC-LABEL: test_psrlvd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlvd_ymm:
@@ -6213,7 +6213,7 @@ define <2 x i64> @test_psrlvq(<2 x i64>
 ; GENERIC-LABEL: test_psrlvq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlvq:
@@ -6256,7 +6256,7 @@ define <4 x i64> @test_psrlvq_ymm(<4 x i
 ; GENERIC-LABEL: test_psrlvq_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlvq_ymm:
@@ -6299,7 +6299,7 @@ define <16 x i16> @test_psrlw(<16 x i16>
 ; GENERIC-LABEL: test_psrlw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpsrlw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6349,7 +6349,7 @@ define <32 x i8> @test_psubb(<32 x i8> %
 ; GENERIC-LABEL: test_psubb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubb:
@@ -6391,7 +6391,7 @@ define <8 x i32> @test_psubd(<8 x i32> %
 ; GENERIC-LABEL: test_psubd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubd:
@@ -6433,7 +6433,7 @@ define <4 x i64> @test_psubq(<4 x i64> %
 ; GENERIC-LABEL: test_psubq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubq:
@@ -6475,7 +6475,7 @@ define <32 x i8> @test_psubsb(<32 x i8>
 ; GENERIC-LABEL: test_psubsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsb:
@@ -6518,7 +6518,7 @@ define <16 x i16> @test_psubsw(<16 x i16
 ; GENERIC-LABEL: test_psubsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsw:
@@ -6561,7 +6561,7 @@ define <32 x i8> @test_psubusb(<32 x i8>
 ; GENERIC-LABEL: test_psubusb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusb:
@@ -6604,7 +6604,7 @@ define <16 x i16> @test_psubusw(<16 x i1
 ; GENERIC-LABEL: test_psubusw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusw:
@@ -6647,7 +6647,7 @@ define <16 x i16> @test_psubw(<16 x i16>
 ; GENERIC-LABEL: test_psubw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubw:
@@ -6689,7 +6689,7 @@ define <32 x i8> @test_punpckhbw(<32 x i
 ; GENERIC-LABEL: test_punpckhbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhbw:
@@ -6731,7 +6731,7 @@ define <8 x i32> @test_punpckhdq(<8 x i3
 ; GENERIC-LABEL: test_punpckhdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6786,7 +6786,7 @@ define <4 x i64> @test_punpckhqdq(<4 x i
 ; GENERIC-LABEL: test_punpckhqdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6835,7 +6835,7 @@ define <16 x i16> @test_punpckhwd(<16 x
 ; GENERIC-LABEL: test_punpckhwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhwd:
@@ -6877,7 +6877,7 @@ define <32 x i8> @test_punpcklbw(<32 x i
 ; GENERIC-LABEL: test_punpcklbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklbw:
@@ -6919,7 +6919,7 @@ define <8 x i32> @test_punpckldq(<8 x i3
 ; GENERIC-LABEL: test_punpckldq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6974,7 +6974,7 @@ define <4 x i64> @test_punpcklqdq(<4 x i
 ; GENERIC-LABEL: test_punpcklqdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7023,7 +7023,7 @@ define <16 x i16> @test_punpcklwd(<16 x
 ; GENERIC-LABEL: test_punpcklwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; GENERIC-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
+; GENERIC-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklwd:
@@ -7065,7 +7065,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a
 ; GENERIC-LABEL: test_pxor:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpxor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Fri Apr  6 04:00:51 2018
@@ -22,7 +22,7 @@ entry:
 define <8 x double> @addpd512fold(<8 x double> %y) {
 ; GENERIC-LABEL: addpd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: addpd512fold:
@@ -52,7 +52,7 @@ entry:
 define <16 x float> @addps512fold(<16 x float> %y) {
 ; GENERIC-LABEL: addps512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: addps512fold:
@@ -82,7 +82,7 @@ entry:
 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
 ; GENERIC-LABEL: subpd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: subpd512fold:
@@ -113,7 +113,7 @@ entry:
 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
 ; GENERIC-LABEL: subps512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: subps512fold:
@@ -186,7 +186,7 @@ entry:
 define <8 x double> @mulpd512fold(<8 x double> %y) {
 ; GENERIC-LABEL: mulpd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mulpd512fold:
@@ -216,7 +216,7 @@ entry:
 define <16 x float> @mulps512fold(<16 x float> %y) {
 ; GENERIC-LABEL: mulps512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mulps512fold:
@@ -246,7 +246,7 @@ entry:
 define <8 x double> @divpd512fold(<8 x double> %y) {
 ; GENERIC-LABEL: divpd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
+; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [29:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divpd512fold:
@@ -276,7 +276,7 @@ entry:
 define <16 x float> @divps512fold(<16 x float> %y) {
 ; GENERIC-LABEL: divps512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
+; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [29:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divps512fold:
@@ -305,7 +305,7 @@ define <8 x i64> @vpaddq_test(<8 x i64>
 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
 ; GENERIC-LABEL: vpaddq_fold_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddq_fold_test:
@@ -320,7 +320,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x
 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
 ; GENERIC-LABEL: vpaddq_broadcast_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddq_broadcast_test:
@@ -334,7 +334,7 @@ define <8 x i64> @vpaddq_broadcast_test(
 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
 ; GENERIC-LABEL: vpaddq_broadcast2_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddq_broadcast2_test:
@@ -371,7 +371,7 @@ define <16 x i32> @vpaddd_test(<16 x i32
 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
 ; GENERIC-LABEL: vpaddd_fold_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_fold_test:
@@ -386,7 +386,7 @@ define <16 x i32> @vpaddd_fold_test(<16
 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
 ; GENERIC-LABEL: vpaddd_broadcast_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_broadcast_test:
@@ -437,7 +437,7 @@ define <16 x i32> @vpaddd_mask_fold_test
 ; GENERIC-LABEL: vpaddd_mask_fold_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_mask_fold_test:
@@ -456,7 +456,7 @@ define <16 x i32> @vpaddd_mask_broadcast
 ; GENERIC-LABEL: vpaddd_mask_broadcast_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_mask_broadcast_test:
@@ -474,7 +474,7 @@ define <16 x i32> @vpaddd_maskz_fold_tes
 ; GENERIC-LABEL: vpaddd_maskz_fold_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_maskz_fold_test:
@@ -493,7 +493,7 @@ define <16 x i32> @vpaddd_maskz_broadcas
 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpaddd_maskz_broadcast_test:
@@ -629,7 +629,7 @@ define <8 x double> @sqrtE(<8 x double>
 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
 ; GENERIC-LABEL: fadd_broadcast:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: fadd_broadcast:
@@ -643,7 +643,7 @@ define <16 x float> @fadd_broadcast(<16
 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
 ; GENERIC-LABEL: addq_broadcast:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: addq_broadcast:
@@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i6
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
 ; GENERIC-LABEL: orq_broadcast:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: orq_broadcast:
@@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
 ; GENERIC-LABEL: andd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andd512fold:
@@ -687,7 +687,7 @@ entry:
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
 ; GENERIC-LABEL: andqbrst:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andqbrst:
@@ -892,7 +892,7 @@ define <8 x double> @test_mask_fold_vadd
 ; GENERIC-LABEL: test_mask_fold_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mask_fold_vaddpd:
@@ -911,7 +911,7 @@ define <8 x double> @test_maskz_fold_vad
 ; GENERIC-LABEL: test_maskz_fold_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_maskz_fold_vaddpd:
@@ -929,7 +929,7 @@ define <8 x double> @test_maskz_fold_vad
 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
 ; GENERIC-LABEL: test_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_broadcast_vaddpd:
@@ -948,7 +948,7 @@ define <8 x double> @test_mask_broadcast
 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -972,7 +972,7 @@ define <8 x double> @test_maskz_broadcas
 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_maskz_broadcast_vaddpd:
@@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcas
 define <16 x float>  @test_fxor(<16 x float> %a) {
 ; GENERIC-LABEL: test_fxor:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_fxor:
@@ -1009,7 +1009,7 @@ define <16 x float>  @test_fxor(<16 x fl
 define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
 ; GENERIC-LABEL: test_fxor_8f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_fxor_8f32:
@@ -1023,7 +1023,7 @@ define <8 x float>  @test_fxor_8f32(<8 x
 define <8 x double> @fabs_v8f64(<8 x double> %p)
 ; GENERIC-LABEL: fabs_v8f64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: fabs_v8f64:
@@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8
 define <16 x float> @fabs_v16f32(<16 x float> %p)
 ; GENERIC-LABEL: fabs_v16f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: fabs_v16f32:
@@ -1403,7 +1403,7 @@ define <2 x float> @sltof2f32(<2 x i64>
 define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
 ; GENERIC-LABEL: slto4f32_mem:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: slto4f32_mem:
@@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ubto8f32:
@@ -2869,7 +2869,7 @@ define <4 x float> @ubto4f32(<4 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ubto4f32:
@@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x8mem_to_8x16:
@@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x8mem_to_8x16:
@@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_16x8mem_to_16x16:
@@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_16x8mem_to_16x16:
@@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_32x8mem_to_32x16:
@@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_32x8mem_to_32x16:
@@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x8mem_to_4x32:
@@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x8mem_to_4x32:
@@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x8mem_to_8x32:
@@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x8mem_to_8x32:
@@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_16x8mem_to_16x32:
@@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_16x8mem_to_16x32:
@@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x8mem_to_2x64:
@@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
@@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x8mem_to_4x64:
@@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
@@ -3487,7 +3487,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_4x8mem_to_4x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x8mem_to_4x64:
@@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x8mem_to_8x64:
@@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
@@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_8x8mem_to_8x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x8mem_to_8x64:
@@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x16mem_to_4x32:
@@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
@@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x16mem_to_8x32:
@@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
@@ -3653,7 +3653,7 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_8x16mem_to_8x32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x16mem_to_8x32:
@@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x3
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_16x16mem_to_16x32:
@@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x3
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
@@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x3
 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_16x16mem_to_16x32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_16x16mem_to_16x32:
@@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x16mem_to_2x64:
@@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
@@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x16mem_to_4x64:
@@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
@@ -3883,7 +3883,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_4x16mem_to_4x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x16mem_to_4x64:
@@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x16mem_to_8x64:
@@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
@@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_8x16mem_to_8x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x16mem_to_8x64:
@@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x32mem_to_2x64:
@@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
@@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x32mem_to_4x64:
@@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
@@ -4081,7 +4081,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_4x32mem_to_4x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x32mem_to_4x64:
@@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_8x32mem_to_8x64:
@@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
@@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
 ; GENERIC-LABEL: sext_8x32mem_to_8x64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8x32mem_to_8x64:
@@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32
 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
 ; GENERIC-LABEL: extload_v8i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_
 ; GENERIC-LABEL: test_x86_fnmsub_ps_z:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4918,7 +4918,7 @@ define double @test_x86_fmsub_231_m(doub
 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 ; GENERIC-LABEL: test231_br:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4936,7 +4936,7 @@ define <16 x float> @test213_br(<16 x fl
 ; GENERIC-LABEL: test213_br:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test213_br:
@@ -4955,7 +4955,7 @@ define <16 x float> @test_x86_fmadd132_p
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00]
+; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [10:1.00]
 ; GENERIC-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4979,7 +4979,7 @@ define <16 x float> @test_x86_fmadd231_p
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5006,7 +5006,7 @@ define <16 x float> @test_x86_fmadd213_p
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00]
+; GENERIC-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5028,7 +5028,7 @@ define <16 x float> @test_x86_fmadd213_p
 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpandd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5048,7 +5048,7 @@ entry:
 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpandnd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5070,7 +5070,7 @@ entry:
 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpord:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5090,7 +5090,7 @@ entry:
 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpxord:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5110,7 +5110,7 @@ entry:
 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpandq:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5129,7 +5129,7 @@ entry:
 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpandnq:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5149,7 +5149,7 @@ entry:
 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vporq:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5168,7 +5168,7 @@ entry:
 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: vpxorq:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6743,7 +6743,7 @@ define i32 @mask8_zext(i8 %x) {
 define void @mask16_mem(i16* %ptr) {
 ; GENERIC-LABEL: mask16_mem:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6765,7 +6765,7 @@ define void @mask16_mem(i16* %ptr) {
 define void @mask8_mem(i8* %ptr) {
 ; GENERIC-LABEL: mask8_mem:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6814,8 +6814,8 @@ define i16 @mand16(i16 %x, i16 %y) {
 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
 ; GENERIC-LABEL: mand16_mem:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [4:0.50]
-; GENERIC-NEXT:    kmovw (%rsi), %k1 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
+; GENERIC-NEXT:    kmovw (%rsi), %k1 # sched: [5:0.50]
 ; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:1.00]
 ; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:1.00]
@@ -7589,7 +7589,7 @@ define <32 x i16> @test_build_vec_v32i1(
 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
 ; GENERIC-LABEL: test_build_vec_v64i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_build_vec_v64i1:
@@ -7742,7 +7742,7 @@ End:
 define <8 x i64> @load_8i1(<8 x i1>* %a) {
 ; GENERIC-LABEL: load_8i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7759,7 +7759,7 @@ define <8 x i64> @load_8i1(<8 x i1>* %a)
 define <16 x i32> @load_16i1(<16 x i1>* %a) {
 ; GENERIC-LABEL: load_16i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7776,7 +7776,7 @@ define <16 x i32> @load_16i1(<16 x i1>*
 define <2 x i16> @load_2i1(<2 x i1>* %a) {
 ; GENERIC-LABEL: load_2i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7793,7 +7793,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
 define <4 x i16> @load_4i1(<4 x i1>* %a) {
 ; GENERIC-LABEL: load_4i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7810,7 +7810,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a)
 define <32 x i16> @load_32i1(<32 x i1>* %a) {
 ; GENERIC-LABEL: load_32i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovd (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7827,7 +7827,7 @@ define <32 x i16> @load_32i1(<32 x i1>*
 define <64 x i8> @load_64i1(<64 x i1>* %a) {
 ; GENERIC-LABEL: load_64i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovq (%rdi), %k0 # sched: [4:0.50]
+; GENERIC-NEXT:    kmovq (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8258,7 +8258,7 @@ define   <16 x float> @_ss16xfloat_maskz
 define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
 ; GENERIC-LABEL: _ss16xfloat_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _ss16xfloat_load:
@@ -8275,7 +8275,7 @@ define   <16 x float> @_ss16xfloat_mask_
 ; GENERIC-LABEL: _ss16xfloat_mask_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _ss16xfloat_mask_load:
@@ -8295,7 +8295,7 @@ define   <16 x float> @_ss16xfloat_maskz
 ; GENERIC-LABEL: _ss16xfloat_maskz_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _ss16xfloat_maskz_load:
@@ -8369,7 +8369,7 @@ define   <8 x double> @_sd8xdouble_maskz
 define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
 ; GENERIC-LABEL: _sd8xdouble_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _sd8xdouble_load:
@@ -8386,7 +8386,7 @@ define   <8 x double> @_sd8xdouble_mask_
 ; GENERIC-LABEL: _sd8xdouble_mask_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _sd8xdouble_mask_load:
@@ -8406,7 +8406,7 @@ define   <8 x double> @_sd8xdouble_maskz
 ; GENERIC-LABEL: _sd8xdouble_maskz_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: _sd8xdouble_maskz_load:
@@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(
 ; GENERIC-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
 ; GENERIC-NEXT:    callq func_f32
-; GENERIC-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8730,7 +8730,7 @@ define <8 x double> @broadcast_sd_spill(
 ; GENERIC-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
 ; GENERIC-NEXT:    callq func_f64
-; GENERIC-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Fri Apr  6 04:00:51 2018
@@ -202,7 +202,7 @@ define <16 x i16> @test_16xi16_perm_mem_
 ; GENERIC-LABEL: test_16xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi16_perm_mem_mask0:
@@ -219,7 +219,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask0:
@@ -240,7 +240,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0:
@@ -261,7 +261,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask1:
@@ -282,7 +282,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1:
@@ -303,7 +303,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask2:
@@ -324,7 +324,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2:
@@ -344,7 +344,7 @@ define <16 x i16> @test_16xi16_perm_mem_
 ; GENERIC-LABEL: test_16xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi16_perm_mem_mask3:
@@ -361,7 +361,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_mem_mask3:
@@ -382,7 +382,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3:
@@ -596,7 +596,7 @@ define <32 x i16> @test_32xi16_perm_mem_
 ; GENERIC-LABEL: test_32xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi16_perm_mem_mask0:
@@ -613,7 +613,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask0:
@@ -634,7 +634,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0:
@@ -655,7 +655,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask1:
@@ -676,7 +676,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1:
@@ -697,7 +697,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask2:
@@ -718,7 +718,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2:
@@ -738,7 +738,7 @@ define <32 x i16> @test_32xi16_perm_mem_
 ; GENERIC-LABEL: test_32xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi16_perm_mem_mask3:
@@ -755,7 +755,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_mem_mask3:
@@ -776,7 +776,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3:
@@ -990,7 +990,7 @@ define <8 x i32> @test_8xi32_perm_mem_ma
 ; GENERIC-LABEL: test_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_perm_mem_mask0:
@@ -1007,7 +1007,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask0:
@@ -1028,7 +1028,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0:
@@ -1049,7 +1049,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask1:
@@ -1070,7 +1070,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1:
@@ -1091,7 +1091,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask2:
@@ -1112,7 +1112,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2:
@@ -1132,7 +1132,7 @@ define <8 x i32> @test_8xi32_perm_mem_ma
 ; GENERIC-LABEL: test_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_perm_mem_mask3:
@@ -1149,7 +1149,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi32_perm_mem_mask3:
@@ -1170,7 +1170,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3:
@@ -1384,7 +1384,7 @@ define <16 x i32> @test_16xi32_perm_mem_
 ; GENERIC-LABEL: test_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_perm_mem_mask0:
@@ -1401,7 +1401,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask0:
@@ -1422,7 +1422,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0:
@@ -1443,7 +1443,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask1:
@@ -1464,7 +1464,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1:
@@ -1485,7 +1485,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask2:
@@ -1506,7 +1506,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2:
@@ -1526,7 +1526,7 @@ define <16 x i32> @test_16xi32_perm_mem_
 ; GENERIC-LABEL: test_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_perm_mem_mask3:
@@ -1543,7 +1543,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi32_perm_mem_mask3:
@@ -1564,7 +1564,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3:
@@ -1757,7 +1757,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) {
 ; GENERIC-LABEL: test_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_perm_mem_mask0:
@@ -1772,7 +1772,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask0:
@@ -1791,7 +1791,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0:
@@ -1810,7 +1810,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask1:
@@ -1829,7 +1829,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1:
@@ -1848,7 +1848,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask2:
@@ -1867,7 +1867,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2:
@@ -1885,7 +1885,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) {
 ; GENERIC-LABEL: test_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_perm_mem_mask3:
@@ -1900,7 +1900,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi64_perm_mem_mask3:
@@ -1919,7 +1919,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3:
@@ -2293,7 +2293,7 @@ define <8 x i64> @test_8xi64_perm_mem_ma
 ; GENERIC-LABEL: test_8xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_perm_mem_mask0:
@@ -2310,7 +2310,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask0:
@@ -2331,7 +2331,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0:
@@ -2351,7 +2351,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
@@ -2370,7 +2370,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
@@ -2390,7 +2390,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask2:
@@ -2411,7 +2411,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2:
@@ -2430,7 +2430,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) {
 ; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_perm_imm_mem_mask3:
@@ -2445,7 +2445,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
@@ -2464,7 +2464,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
@@ -2484,7 +2484,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask4:
@@ -2505,7 +2505,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4:
@@ -2525,7 +2525,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
@@ -2544,7 +2544,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
@@ -2563,7 +2563,7 @@ define <8 x i64> @test_8xi64_perm_mem_ma
 ; GENERIC-LABEL: test_8xi64_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_perm_mem_mask6:
@@ -2580,7 +2580,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_mem_mask6:
@@ -2601,7 +2601,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6:
@@ -2621,7 +2621,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
@@ -2640,7 +2640,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
@@ -2853,7 +2853,7 @@ define <8 x float> @test_8xfloat_perm_me
 ; GENERIC-LABEL: test_8xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_perm_mem_mask0:
@@ -2870,7 +2870,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0:
@@ -2891,7 +2891,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
@@ -2912,7 +2912,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1:
@@ -2933,7 +2933,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
@@ -2954,7 +2954,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2:
@@ -2975,7 +2975,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
@@ -2995,7 +2995,7 @@ define <8 x float> @test_8xfloat_perm_me
 ; GENERIC-LABEL: test_8xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_perm_mem_mask3:
@@ -3012,7 +3012,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3:
@@ -3033,7 +3033,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
@@ -3247,7 +3247,7 @@ define <16 x float> @test_16xfloat_perm_
 ; GENERIC-LABEL: test_16xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_perm_mem_mask0:
@@ -3264,7 +3264,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0:
@@ -3285,7 +3285,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
@@ -3306,7 +3306,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1:
@@ -3327,7 +3327,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
@@ -3348,7 +3348,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2:
@@ -3369,7 +3369,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
@@ -3389,7 +3389,7 @@ define <16 x float> @test_16xfloat_perm_
 ; GENERIC-LABEL: test_16xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_perm_mem_mask3:
@@ -3406,7 +3406,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3:
@@ -3427,7 +3427,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
@@ -3620,7 +3620,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
 ; GENERIC-LABEL: test_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_perm_mem_mask0:
@@ -3635,7 +3635,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0:
@@ -3654,7 +3654,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
@@ -3673,7 +3673,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1:
@@ -3692,7 +3692,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
@@ -3711,7 +3711,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2:
@@ -3730,7 +3730,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
@@ -3748,7 +3748,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
 ; GENERIC-LABEL: test_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_perm_mem_mask3:
@@ -3763,7 +3763,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3:
@@ -3782,7 +3782,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
@@ -4156,7 +4156,7 @@ define <8 x double> @test_8xdouble_perm_
 ; GENERIC-LABEL: test_8xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_perm_mem_mask0:
@@ -4173,7 +4173,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0:
@@ -4194,7 +4194,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
@@ -4214,7 +4214,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
@@ -4233,7 +4233,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
@@ -4253,7 +4253,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2:
@@ -4274,7 +4274,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
@@ -4293,7 +4293,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) {
 ; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3:
@@ -4308,7 +4308,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
@@ -4327,7 +4327,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
@@ -4347,7 +4347,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4:
@@ -4368,7 +4368,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
@@ -4388,7 +4388,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
@@ -4407,7 +4407,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
@@ -4426,7 +4426,7 @@ define <8 x double> @test_8xdouble_perm_
 ; GENERIC-LABEL: test_8xdouble_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_perm_mem_mask6:
@@ -4443,7 +4443,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6:
@@ -4464,7 +4464,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
@@ -4484,7 +4484,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
@@ -4503,7 +4503,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
@@ -4535,7 +4535,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4555,7 +4555,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mask0:
@@ -4572,7 +4572,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4592,7 +4592,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mask1:
@@ -4609,7 +4609,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4629,7 +4629,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mask2:
@@ -4659,7 +4659,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4679,7 +4679,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mask3:
@@ -4713,7 +4713,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask0:
@@ -4734,7 +4734,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0:
@@ -4755,7 +4755,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask1:
@@ -4776,7 +4776,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1:
@@ -4797,7 +4797,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask2:
@@ -4818,7 +4818,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2:
@@ -4855,7 +4855,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mem_mask3:
@@ -4876,7 +4876,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3:
@@ -4895,7 +4895,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) {
 ; GENERIC-LABEL: test_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi8_perm_mask0:
@@ -4909,7 +4909,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4929,7 +4929,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mask0:
@@ -4946,7 +4946,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4966,7 +4966,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mask1:
@@ -4983,7 +4983,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5003,7 +5003,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mask2:
@@ -5019,7 +5019,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) {
 ; GENERIC-LABEL: test_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi8_perm_mask3:
@@ -5033,7 +5033,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5053,7 +5053,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mask3:
@@ -5070,7 +5070,7 @@ define <32 x i8> @test_32xi8_perm_mem_ma
 ; GENERIC-LABEL: test_32xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi8_perm_mem_mask0:
@@ -5087,7 +5087,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask0:
@@ -5108,7 +5108,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0:
@@ -5129,7 +5129,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask1:
@@ -5150,7 +5150,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1:
@@ -5171,7 +5171,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask2:
@@ -5192,7 +5192,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2:
@@ -5212,7 +5212,7 @@ define <32 x i8> @test_32xi8_perm_mem_ma
 ; GENERIC-LABEL: test_32xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi8_perm_mem_mask3:
@@ -5229,7 +5229,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi8_perm_mem_mask3:
@@ -5250,7 +5250,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3:
@@ -5269,7 +5269,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) {
 ; GENERIC-LABEL: test_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_64xi8_perm_mask0:
@@ -5283,7 +5283,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5303,7 +5303,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mask0:
@@ -5320,7 +5320,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5340,7 +5340,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mask1:
@@ -5357,7 +5357,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5377,7 +5377,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mask2:
@@ -5393,7 +5393,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) {
 ; GENERIC-LABEL: test_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_64xi8_perm_mask3:
@@ -5407,7 +5407,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5427,7 +5427,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mask3:
@@ -5444,7 +5444,7 @@ define <64 x i8> @test_64xi8_perm_mem_ma
 ; GENERIC-LABEL: test_64xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_64xi8_perm_mem_mask0:
@@ -5461,7 +5461,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0:
@@ -5482,7 +5482,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0:
@@ -5503,7 +5503,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1:
@@ -5524,7 +5524,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1:
@@ -5545,7 +5545,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2:
@@ -5566,7 +5566,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2:
@@ -5586,7 +5586,7 @@ define <64 x i8> @test_64xi8_perm_mem_ma
 ; GENERIC-LABEL: test_64xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_64xi8_perm_mem_mask3:
@@ -5603,7 +5603,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3:
@@ -5624,7 +5624,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3:
@@ -5993,7 +5993,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0:
@@ -6012,7 +6012,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
@@ -6031,7 +6031,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1:
@@ -6050,7 +6050,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
@@ -6069,7 +6069,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2:
@@ -6088,7 +6088,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
@@ -6121,7 +6121,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3:
@@ -6140,7 +6140,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
@@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4:
@@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
@@ -6197,7 +6197,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5:
@@ -6216,7 +6216,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
@@ -6249,7 +6249,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6:
@@ -6268,7 +6268,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
@@ -6287,7 +6287,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7:
@@ -6306,7 +6306,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
@@ -6659,7 +6659,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) {
 ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi16_perm_high_mem_mask0:
@@ -6674,7 +6674,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0:
@@ -6693,7 +6693,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
@@ -6712,7 +6712,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1:
@@ -6731,7 +6731,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
@@ -6750,7 +6750,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2:
@@ -6769,7 +6769,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
@@ -6787,7 +6787,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) {
 ; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi16_perm_low_mem_mask3:
@@ -6802,7 +6802,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3:
@@ -6821,7 +6821,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
@@ -6840,7 +6840,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4:
@@ -6859,7 +6859,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
@@ -6878,7 +6878,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5:
@@ -6897,7 +6897,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
@@ -6915,7 +6915,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) {
 ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi16_perm_high_mem_mask6:
@@ -6930,7 +6930,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6:
@@ -6949,7 +6949,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
@@ -6968,7 +6968,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7:
@@ -6987,7 +6987,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
@@ -7340,7 +7340,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) {
 ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi16_perm_high_mem_mask0:
@@ -7355,7 +7355,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0:
@@ -7374,7 +7374,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
@@ -7393,7 +7393,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1:
@@ -7412,7 +7412,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
@@ -7431,7 +7431,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2:
@@ -7450,7 +7450,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
@@ -7468,7 +7468,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) {
 ; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi16_perm_low_mem_mask3:
@@ -7483,7 +7483,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3:
@@ -7502,7 +7502,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
@@ -7521,7 +7521,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4:
@@ -7540,7 +7540,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
@@ -7558,7 +7558,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7579,7 +7579,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7600,7 +7600,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) {
 ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_32xi16_perm_high_mem_mask6:
@@ -7615,7 +7615,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6:
@@ -7634,7 +7634,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
@@ -7653,7 +7653,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7:
@@ -7672,7 +7672,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
@@ -7879,7 +7879,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0:
@@ -7898,7 +7898,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0:
@@ -7917,7 +7917,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1:
@@ -7936,7 +7936,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1:
@@ -7955,7 +7955,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2:
@@ -7974,7 +7974,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2:
@@ -8007,7 +8007,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3:
@@ -8026,7 +8026,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3:
@@ -8233,7 +8233,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0:
@@ -8252,7 +8252,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
@@ -8271,7 +8271,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1:
@@ -8290,7 +8290,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
@@ -8309,7 +8309,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2:
@@ -8328,7 +8328,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
@@ -8361,7 +8361,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3:
@@ -8380,7 +8380,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
@@ -8572,7 +8572,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
 ; GENERIC-LABEL: test2_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_16xi32_perm_mem_mask0:
@@ -8587,7 +8587,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0:
@@ -8606,7 +8606,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
@@ -8625,7 +8625,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1:
@@ -8644,7 +8644,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
@@ -8663,7 +8663,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2:
@@ -8682,7 +8682,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
@@ -8700,7 +8700,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
 ; GENERIC-LABEL: test2_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_16xi32_perm_mem_mask3:
@@ -8715,7 +8715,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3:
@@ -8734,7 +8734,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
@@ -8941,7 +8941,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8962,7 +8962,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
@@ -8981,7 +8981,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9002,7 +9002,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
@@ -9021,7 +9021,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9042,7 +9042,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
@@ -9075,7 +9075,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9096,7 +9096,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
@@ -9288,7 +9288,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_shuff_mem_mask0:
@@ -9303,7 +9303,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9324,7 +9324,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
@@ -9343,7 +9343,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9364,7 +9364,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
@@ -9383,7 +9383,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9404,7 +9404,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
@@ -9422,7 +9422,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_shuff_mem_mask3:
@@ -9437,7 +9437,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9458,7 +9458,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
@@ -9665,7 +9665,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9686,7 +9686,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
@@ -9705,7 +9705,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9726,7 +9726,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
@@ -9745,7 +9745,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9766,7 +9766,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
@@ -9799,7 +9799,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9820,7 +9820,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
@@ -10012,7 +10012,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_shuff_mem_mask0:
@@ -10027,7 +10027,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10048,7 +10048,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
@@ -10067,7 +10067,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10088,7 +10088,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
@@ -10107,7 +10107,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10128,7 +10128,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
@@ -10146,7 +10146,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_shuff_mem_mask3:
@@ -10161,7 +10161,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10182,7 +10182,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
@@ -10374,7 +10374,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
 ; GENERIC-LABEL: test_8xi32_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_shuff_mem_mask0:
@@ -10389,7 +10389,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10410,7 +10410,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
@@ -10429,7 +10429,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10450,7 +10450,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
@@ -10469,7 +10469,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10490,7 +10490,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
@@ -10508,7 +10508,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
 ; GENERIC-LABEL: test_8xi32_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_shuff_mem_mask3:
@@ -10523,7 +10523,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10544,7 +10544,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
@@ -10736,7 +10736,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) {
 ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_shuff_mem_mask0:
@@ -10751,7 +10751,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10772,7 +10772,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
@@ -10791,7 +10791,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10812,7 +10812,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
@@ -10831,7 +10831,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10852,7 +10852,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
@@ -10870,7 +10870,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) {
 ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_shuff_mem_mask3:
@@ -10885,7 +10885,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10906,7 +10906,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
@@ -11098,7 +11098,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
 ; GENERIC-LABEL: test_4xi64_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_shuff_mem_mask0:
@@ -11113,7 +11113,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11134,7 +11134,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
@@ -11153,7 +11153,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11174,7 +11174,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
@@ -11193,7 +11193,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11214,7 +11214,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
@@ -11232,7 +11232,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
 ; GENERIC-LABEL: test_4xi64_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_shuff_mem_mask3:
@@ -11247,7 +11247,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11268,7 +11268,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
@@ -11460,7 +11460,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) {
 ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_shuff_mem_mask0:
@@ -11475,7 +11475,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11496,7 +11496,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
@@ -11515,7 +11515,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11536,7 +11536,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
@@ -11555,7 +11555,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11576,7 +11576,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
@@ -11594,7 +11594,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) {
 ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_shuff_mem_mask3:
@@ -11609,7 +11609,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11630,7 +11630,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
@@ -11837,7 +11837,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11858,7 +11858,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
@@ -11877,7 +11877,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11898,7 +11898,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
@@ -11917,7 +11917,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11938,7 +11938,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
@@ -11971,7 +11971,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11992,7 +11992,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12199,7 +12199,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12220,7 +12220,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12239,7 +12239,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12260,7 +12260,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12279,7 +12279,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12300,7 +12300,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12333,7 +12333,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12354,7 +12354,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12546,7 +12546,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0:
@@ -12561,7 +12561,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12582,7 +12582,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12601,7 +12601,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12622,7 +12622,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12641,7 +12641,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12662,7 +12662,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12680,7 +12680,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3:
@@ -12695,7 +12695,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12716,7 +12716,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12836,7 +12836,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12857,7 +12857,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
@@ -12876,7 +12876,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12897,7 +12897,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13104,7 +13104,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13125,7 +13125,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13144,7 +13144,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13165,7 +13165,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13184,7 +13184,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13205,7 +13205,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13238,7 +13238,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13259,7 +13259,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
@@ -13451,7 +13451,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0:
@@ -13466,7 +13466,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13487,7 +13487,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13506,7 +13506,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13527,7 +13527,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13546,7 +13546,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13567,7 +13567,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13585,7 +13585,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3:
@@ -13600,7 +13600,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13621,7 +13621,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
+; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
@@ -13828,7 +13828,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13849,7 +13849,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
@@ -13868,7 +13868,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13889,7 +13889,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
@@ -13908,7 +13908,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13929,7 +13929,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
@@ -13962,7 +13962,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13983,7 +13983,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14190,7 +14190,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14211,7 +14211,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14230,7 +14230,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14251,7 +14251,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14270,7 +14270,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14291,7 +14291,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14324,7 +14324,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14345,7 +14345,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14537,7 +14537,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0:
@@ -14552,7 +14552,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14573,7 +14573,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14592,7 +14592,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14613,7 +14613,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14632,7 +14632,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14653,7 +14653,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14671,7 +14671,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
 ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3:
@@ -14686,7 +14686,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14707,7 +14707,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14827,7 +14827,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14848,7 +14848,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
@@ -14867,7 +14867,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14888,7 +14888,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15095,7 +15095,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15116,7 +15116,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15135,7 +15135,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15156,7 +15156,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15175,7 +15175,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15196,7 +15196,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15229,7 +15229,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15250,7 +15250,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
@@ -15442,7 +15442,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0:
@@ -15457,7 +15457,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15478,7 +15478,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15497,7 +15497,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15518,7 +15518,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15537,7 +15537,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15558,7 +15558,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15576,7 +15576,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
 ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3:
@@ -15591,7 +15591,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15612,7 +15612,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:

Modified: llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll Fri Apr  6 04:00:51 2018
@@ -10,12 +10,12 @@ define void @test_vpopcntd(<16 x i32> %a
 ; GENERIC-NEXT:    vpopcntd %zmm1, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpopcntd %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 {%k1} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -48,12 +48,12 @@ define void @test_vpopcntq(<8 x i64> %a0
 ; GENERIC-NEXT:    vpopcntq %zmm1, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpopcntq %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 {%k1} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [7:1.00]
-; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [7:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT:    vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Fri Apr  6 04:00:51 2018
@@ -73,7 +73,7 @@ define i32 @test_andn_i32(i32 %a0, i32 %
 ; GENERIC-LABEL: test_andn_i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
+; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -123,7 +123,7 @@ define i64 @test_andn_i64(i64 %a0, i64 %
 ; GENERIC-LABEL: test_andn_i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
+; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -172,7 +172,7 @@ define i64 @test_andn_i64(i64 %a0, i64 %
 define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_bextr_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [6:1.00]
+; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
 ; GENERIC-NEXT:    bextrl %edi, %esi, %eax # sched: [2:1.00]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -222,7 +222,7 @@ declare i32 @llvm.x86.bmi.bextr.32(i32,
 define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_bextr_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [6:1.00]
+; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
 ; GENERIC-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -272,7 +272,7 @@ declare i64 @llvm.x86.bmi.bextr.64(i64,
 define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_blsi_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsil (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsil %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -323,7 +323,7 @@ define i32 @test_blsi_i32(i32 %a0, i32 *
 define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_blsi_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsiq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsiq %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -374,7 +374,7 @@ define i64 @test_blsi_i64(i64 %a0, i64 *
 define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_blsmsk_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsmskl (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsmskl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -425,7 +425,7 @@ define i32 @test_blsmsk_i32(i32 %a0, i32
 define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_blsmsk_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsmskq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsmskq %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -476,7 +476,7 @@ define i64 @test_blsmsk_i64(i64 %a0, i64
 define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_blsr_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsrl (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsrl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -527,7 +527,7 @@ define i32 @test_blsr_i32(i32 %a0, i32 *
 define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
 ; GENERIC-LABEL: test_blsr_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsrq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
 ; GENERIC-NEXT:    blsrq %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll Fri Apr  6 04:00:51 2018
@@ -9,7 +9,7 @@
 define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_bzhi_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bzhil %edi, (%rdx), %ecx # sched: [5:1.00]
+; GENERIC-NEXT:    bzhil %edi, (%rdx), %ecx # sched: [6:1.00]
 ; GENERIC-NEXT:    bzhil %edi, %esi, %eax # sched: [1:1.00]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -59,7 +59,7 @@ declare i32 @llvm.x86.bmi.bzhi.32(i32, i
 define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_bzhi_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bzhiq %rdi, (%rdx), %rcx # sched: [5:1.00]
+; GENERIC-NEXT:    bzhiq %rdi, (%rdx), %rcx # sched: [6:1.00]
 ; GENERIC-NEXT:    bzhiq %rdi, %rsi, %rax # sched: [1:1.00]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -111,7 +111,7 @@ define void @test_mulx_i32(i32 %a0, i32
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    mulxl %esi, %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    mulxl (%rdx), %esi, %edi # sched: [7:1.00]
+; GENERIC-NEXT:    mulxl (%rdx), %esi, %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -164,7 +164,7 @@ define i64 @test_mulx_i64(i64 %a0, i64 %
 ; GENERIC-NEXT:    movq %rdx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %rdi, %rdx # sched: [1:0.33]
 ; GENERIC-NEXT:    mulxq %rsi, %rsi, %rcx # sched: [3:1.00]
-; GENERIC-NEXT:    mulxq (%rax), %rdx, %rax # sched: [7:1.00]
+; GENERIC-NEXT:    mulxq (%rax), %rdx, %rax # sched: [8:1.00]
 ; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -229,7 +229,7 @@ define i64 @test_mulx_i64(i64 %a0, i64 %
 define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_pdep_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pdepl (%rdx), %edi, %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    pdepl (%rdx), %edi, %ecx # sched: [6:0.50]
 ; GENERIC-NEXT:    pdepl %esi, %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -279,7 +279,7 @@ declare i32 @llvm.x86.bmi.pdep.32(i32, i
 define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_pdep_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pdepq (%rdx), %rdi, %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    pdepq (%rdx), %rdi, %rcx # sched: [6:0.50]
 ; GENERIC-NEXT:    pdepq %rsi, %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -329,7 +329,7 @@ declare i64 @llvm.x86.bmi.pdep.64(i64, i
 define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_pext_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pextl (%rdx), %edi, %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    pextl (%rdx), %edi, %ecx # sched: [6:0.50]
 ; GENERIC-NEXT:    pextl %esi, %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -379,7 +379,7 @@ declare i32 @llvm.x86.bmi.pext.32(i32, i
 define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_pext_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pextq (%rdx), %rdi, %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    pextq (%rdx), %rdi, %rcx # sched: [6:0.50]
 ; GENERIC-NEXT:    pextq %rsi, %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -430,7 +430,7 @@ define i32 @test_rorx_i32(i32 %a0, i32 %
 ; GENERIC-LABEL: test_rorx_i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    rorxl $5, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT:    rorxl $5, (%rdx), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    rorxl $5, (%rdx), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -483,7 +483,7 @@ define i64 @test_rorx_i64(i64 %a0, i64 %
 ; GENERIC-LABEL: test_rorx_i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT:    rorxq $5, (%rdx), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    rorxq $5, (%rdx), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -536,7 +536,7 @@ define i32 @test_sarx_i32(i32 %a0, i32 %
 ; GENERIC-LABEL: test_sarx_i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT:    sarxl %esi, (%rdx), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    sarxl %esi, (%rdx), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -585,7 +585,7 @@ define i64 @test_sarx_i64(i64 %a0, i64 %
 ; GENERIC-LABEL: test_sarx_i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT:    sarxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -634,7 +634,7 @@ define i32 @test_shlx_i32(i32 %a0, i32 %
 ; GENERIC-LABEL: test_shlx_i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT:    shlxl %esi, (%rdx), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    shlxl %esi, (%rdx), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -683,7 +683,7 @@ define i64 @test_shlx_i64(i64 %a0, i64 %
 ; GENERIC-LABEL: test_shlx_i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT:    shlxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -732,7 +732,7 @@ define i32 @test_shrx_i32(i32 %a0, i32 %
 ; GENERIC-LABEL: test_shrx_i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT:    shrxl %esi, (%rdx), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    shrxl %esi, (%rdx), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -781,7 +781,7 @@ define i64 @test_shrx_i64(i64 %a0, i64 %
 ; GENERIC-LABEL: test_shrx_i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT:    shrxq %rsi, (%rdx), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/clwb-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clwb-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clwb-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clwb-schedule.ll Fri Apr  6 04:00:51 2018
@@ -5,7 +5,7 @@
 define void @clwb(i8* %a0) nounwind {
 ; GENERIC-LABEL: clwb:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    clwb (%rdi) # sched: [4:0.50]
+; GENERIC-NEXT:    clwb (%rdi) # sched: [5:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: clwb:

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Fri Apr  6 04:00:51 2018
@@ -10,14 +10,14 @@
 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_vcvtph2ps_128:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtph2ps_128:
 ; IVY:       # %bb.0:
-; IVY-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
+; IVY-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
 ; IVY-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; IVY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; IVY-NEXT:    retq # sched: [1:1.00]
@@ -67,14 +67,14 @@ declare <4 x float> @llvm.x86.vcvtph2ps.
 define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_vcvtph2ps_256:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtph2ps_256:
 ; IVY:       # %bb.0:
-; IVY-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
+; IVY-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
 ; IVY-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
 ; IVY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; IVY-NEXT:    retq # sched: [1:1.00]
@@ -125,13 +125,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4
 ; GENERIC-LABEL: test_vcvtps2ph_128:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
+; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtps2ph_128:
 ; IVY:       # %bb.0:
 ; IVY-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
+; IVY-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
 ; IVY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_vcvtps2ph_128:
@@ -175,15 +175,15 @@ define <8 x i16> @test_vcvtps2ph_256(<8
 ; GENERIC-LABEL: test_vcvtps2ph_256:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; GENERIC-NEXT:    vzeroupper
+; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
+; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtps2ph_256:
 ; IVY:       # %bb.0:
 ; IVY-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; IVY-NEXT:    vzeroupper
+; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
+; IVY-NEXT:    vzeroupper # sched: [100:0.33]
 ; IVY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_vcvtps2ph_256:

Modified: llvm/trunk/test/CodeGen/X86/fma-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll Fri Apr  6 04:00:51 2018
@@ -18,9 +18,9 @@ define void @test_vfmaddpd_128(<2 x doub
 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -106,9 +106,9 @@ define void @test_vfmaddpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -200,9 +200,9 @@ define void @test_vfmaddps_128(<4 x floa
 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -288,9 +288,9 @@ define void @test_vfmaddps_256(<8 x floa
 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -382,9 +382,9 @@ define void @test_vfmaddsd_128(<2 x doub
 ; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -470,9 +470,9 @@ define void @test_vfmaddss_128(<4 x floa
 ; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -562,9 +562,9 @@ define void @test_vfmaddsubpd_128(<2 x d
 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -650,9 +650,9 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -744,9 +744,9 @@ define void @test_vfmaddsubps_128(<4 x f
 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -832,9 +832,9 @@ define void @test_vfmaddsubps_256(<8 x f
 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -930,9 +930,9 @@ define void @test_vfmsubaddpd_128(<2 x d
 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1018,9 +1018,9 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1112,9 +1112,9 @@ define void @test_vfmsubaddps_128(<4 x f
 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1200,9 +1200,9 @@ define void @test_vfmsubaddps_256(<8 x f
 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1298,9 +1298,9 @@ define void @test_vfmsubpd_128(<2 x doub
 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1386,9 +1386,9 @@ define void @test_vfmsubpd_256(<4 x doub
 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1480,9 +1480,9 @@ define void @test_vfmsubps_128(<4 x floa
 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1568,9 +1568,9 @@ define void @test_vfmsubps_256(<8 x floa
 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1662,9 +1662,9 @@ define void @test_vfmsubsd_128(<2 x doub
 ; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1750,9 +1750,9 @@ define void @test_vfmsubss_128(<4 x floa
 ; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1842,9 +1842,9 @@ define void @test_vfnmaddpd_128(<2 x dou
 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1930,9 +1930,9 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2024,9 +2024,9 @@ define void @test_vfnmaddps_128(<4 x flo
 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2112,9 +2112,9 @@ define void @test_vfnmaddps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2206,9 +2206,9 @@ define void @test_vfnmaddsd_128(<2 x dou
 ; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2294,9 +2294,9 @@ define void @test_vfnmaddss_128(<4 x flo
 ; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2386,9 +2386,9 @@ define void @test_vfnmsubpd_128(<2 x dou
 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2474,9 +2474,9 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2568,9 +2568,9 @@ define void @test_vfnmsubps_128(<4 x flo
 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2656,9 +2656,9 @@ define void @test_vfnmsubps_256(<8 x flo
 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2750,9 +2750,9 @@ define void @test_vfnmsubsd_128(<2 x dou
 ; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2838,9 +2838,9 @@ define void @test_vfnmsubss_128(<4 x flo
 ; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
 ; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-schedule.ll Fri Apr  6 04:00:51 2018
@@ -14,8 +14,8 @@ define void @test_vfmaddpd_128(<2 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -36,8 +36,8 @@ define void @test_vfmaddpd_256(<4 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -60,8 +60,8 @@ define void @test_vfmaddps_128(<4 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -82,8 +82,8 @@ define void @test_vfmaddps_256(<8 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -106,8 +106,8 @@ define void @test_vfmaddsd_128(<2 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -128,8 +128,8 @@ define void @test_vfmaddss_128(<4 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -154,8 +154,8 @@ define void @test_vfmaddsubpd_128(<2 x d
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -176,8 +176,8 @@ define void @test_vfmaddsubpd_256(<4 x d
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -200,8 +200,8 @@ define void @test_vfmaddsubps_128(<4 x f
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -222,8 +222,8 @@ define void @test_vfmaddsubps_256(<8 x f
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -250,8 +250,8 @@ define void @test_vfmsubaddpd_128(<2 x d
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -272,8 +272,8 @@ define void @test_vfmsubaddpd_256(<4 x d
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -296,8 +296,8 @@ define void @test_vfmsubaddps_128(<4 x f
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -318,8 +318,8 @@ define void @test_vfmsubaddps_256(<8 x f
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -346,8 +346,8 @@ define void @test_vfmsubpd_128(<2 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -368,8 +368,8 @@ define void @test_vfmsubpd_256(<4 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -392,8 +392,8 @@ define void @test_vfmsubps_128(<4 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -414,8 +414,8 @@ define void @test_vfmsubps_256(<8 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -438,8 +438,8 @@ define void @test_vfmsubsd_128(<2 x doub
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -460,8 +460,8 @@ define void @test_vfmsubss_128(<4 x floa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -486,8 +486,8 @@ define void @test_vfnmaddpd_128(<2 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -508,8 +508,8 @@ define void @test_vfnmaddpd_256(<4 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -532,8 +532,8 @@ define void @test_vfnmaddps_128(<4 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -554,8 +554,8 @@ define void @test_vfnmaddps_256(<8 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -578,8 +578,8 @@ define void @test_vfnmaddsd_128(<2 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -600,8 +600,8 @@ define void @test_vfnmaddss_128(<4 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -626,8 +626,8 @@ define void @test_vfnmsubpd_128(<2 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -648,8 +648,8 @@ define void @test_vfnmsubpd_256(<4 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -672,8 +672,8 @@ define void @test_vfnmsubps_128(<4 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -694,8 +694,8 @@ define void @test_vfnmsubps_256(<8 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -718,8 +718,8 @@ define void @test_vfnmsubsd_128(<2 x dou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -740,8 +740,8 @@ define void @test_vfnmsubss_128(<4 x flo
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT:    vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50]
-; GENERIC-NEXT:    vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
+; GENERIC-NEXT:    vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; GENERIC-NEXT:    vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Fri Apr  6 04:00:51 2018
@@ -637,7 +637,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %
 ; GENERIC-LABEL: test_movd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movd %edi, %mm1 # sched: [1:0.33]
-; GENERIC-NEXT:    movd (%rsi), %mm2 # sched: [4:0.50]
+; GENERIC-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
 ; GENERIC-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    movd %mm2, %ecx # sched: [1:0.33]
@@ -670,7 +670,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %
 ; SANDY-LABEL: test_movd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movd %edi, %mm1 # sched: [1:0.33]
-; SANDY-NEXT:    movd (%rsi), %mm2 # sched: [4:0.50]
+; SANDY-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
 ; SANDY-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
 ; SANDY-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    movd %mm2, %ecx # sched: [1:0.33]
@@ -893,7 +893,7 @@ declare void @llvm.x86.mmx.movnt.dq(x86_
 define void @test_movq(i64 *%a0) {
 ; GENERIC-LABEL: test_movq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movq (%rdi), %mm0 # sched: [4:0.50]
+; GENERIC-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
 ; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -916,7 +916,7 @@ define void @test_movq(i64 *%a0) {
 ;
 ; SANDY-LABEL: test_movq:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    movq (%rdi), %mm0 # sched: [4:0.50]
+; SANDY-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
 ; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -1263,7 +1263,7 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; GENERIC-LABEL: test_packssdw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1284,7 +1284,7 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; SANDY-LABEL: test_packssdw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1341,7 +1341,7 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; GENERIC-LABEL: test_packsswb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1362,7 +1362,7 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; SANDY-LABEL: test_packsswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1419,7 +1419,7 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; GENERIC-LABEL: test_packuswb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1440,7 +1440,7 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; SANDY-LABEL: test_packuswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1497,7 +1497,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_paddb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1518,7 +1518,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_paddb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1575,7 +1575,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_paddd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1596,7 +1596,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_paddd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1731,7 +1731,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_paddsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1752,7 +1752,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; SANDY-LABEL: test_paddsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1809,7 +1809,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_paddsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1830,7 +1830,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; SANDY-LABEL: test_paddsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1887,7 +1887,7 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_paddusb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1908,7 +1908,7 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; SANDY-LABEL: test_paddusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1965,7 +1965,7 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_paddusw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1986,7 +1986,7 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; SANDY-LABEL: test_paddusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2043,7 +2043,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_paddw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2064,7 +2064,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_paddw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2199,7 +2199,7 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; GENERIC-LABEL: test_pand:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2220,7 +2220,7 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; SANDY-LABEL: test_pand:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2277,7 +2277,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_pandn:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2298,7 +2298,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_pandn:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2355,7 +2355,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_pavgb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgb %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2376,7 +2376,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_pavgb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgb %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2433,7 +2433,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_pavgw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2454,7 +2454,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_pavgw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2511,7 +2511,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpeqb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2532,7 +2532,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpeqb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2589,7 +2589,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpeqd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2610,7 +2610,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpeqd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2667,7 +2667,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpeqw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2688,7 +2688,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpeqw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2745,7 +2745,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpgtb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2766,7 +2766,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpgtb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2823,7 +2823,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpgtd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2844,7 +2844,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpgtd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2901,7 +2901,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_pcmpgtw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2922,7 +2922,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; SANDY-LABEL: test_pcmpgtw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3747,7 +3747,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_pmaxsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxsw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3768,7 +3768,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; SANDY-LABEL: test_pmaxsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxsw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3825,7 +3825,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_pmaxub:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxub %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3846,7 +3846,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; SANDY-LABEL: test_pmaxub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxub %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3903,7 +3903,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_pminsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminsw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3924,7 +3924,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; SANDY-LABEL: test_pminsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminsw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3981,7 +3981,7 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_pminub:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminub %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [9:1.00]
+; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4002,7 +4002,7 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; SANDY-LABEL: test_pminub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminub %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [9:1.00]
+; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4504,7 +4504,7 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; GENERIC-LABEL: test_por:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4525,7 +4525,7 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; SANDY-LABEL: test_por:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    por (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4737,7 +4737,7 @@ declare x86_mmx @llvm.x86.ssse3.pshuf.b(
 define i64 @test_pshufw(x86_mmx *%a0) optsize {
 ; GENERIC-LABEL: test_pshufw:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00]
+; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; GENERIC-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4758,7 +4758,7 @@ define i64 @test_pshufw(x86_mmx *%a0) op
 ;
 ; SANDY-LABEL: test_pshufw:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00]
+; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; SANDY-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5050,7 +5050,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_pslld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5074,7 +5074,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_pslld:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5140,7 +5140,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psllq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5164,7 +5164,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psllq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5230,7 +5230,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psllw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5254,7 +5254,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psllw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5320,7 +5320,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psrad:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5344,7 +5344,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psrad:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5410,7 +5410,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psraw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5434,7 +5434,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psraw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5500,7 +5500,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psrld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5524,7 +5524,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psrld:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5590,7 +5590,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psrlq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5614,7 +5614,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psrlq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5680,7 +5680,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psrlw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5704,7 +5704,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psrlw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -5770,7 +5770,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psubb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5791,7 +5791,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psubb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5848,7 +5848,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psubd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5869,7 +5869,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psubd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5926,7 +5926,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psubq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5947,7 +5947,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psubq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6004,7 +6004,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_psubsb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6025,7 +6025,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; SANDY-LABEL: test_psubsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6082,7 +6082,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; GENERIC-LABEL: test_psubsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6103,7 +6103,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; SANDY-LABEL: test_psubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6160,7 +6160,7 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_psubusb:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6181,7 +6181,7 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; SANDY-LABEL: test_psubusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6238,7 +6238,7 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; GENERIC-LABEL: test_psubusw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6259,7 +6259,7 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; SANDY-LABEL: test_psubusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6316,7 +6316,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; GENERIC-LABEL: test_psubw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [7:1.00]
+; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6337,7 +6337,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; SANDY-LABEL: test_psubw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [7:1.00]
+; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6394,7 +6394,7 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpckhbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00]
+; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6415,7 +6415,7 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; SANDY-LABEL: test_punpckhbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00]
+; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6472,7 +6472,7 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpckhdq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00]
+; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6493,7 +6493,7 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; SANDY-LABEL: test_punpckhdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00]
+; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6550,7 +6550,7 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpckhwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
+; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6571,7 +6571,7 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; SANDY-LABEL: test_punpckhwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
+; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6628,7 +6628,7 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpcklbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
+; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6649,7 +6649,7 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; SANDY-LABEL: test_punpcklbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
+; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6706,7 +6706,7 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpckldq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00]
+; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6727,7 +6727,7 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; SANDY-LABEL: test_punpckldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6784,7 +6784,7 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; GENERIC-LABEL: test_punpcklwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00]
+; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6805,7 +6805,7 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; SANDY-LABEL: test_punpcklwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00]
+; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6862,7 +6862,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; GENERIC-LABEL: test_pxor:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [5:1.00]
+; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6883,7 +6883,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; SANDY-LABEL: test_pxor:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [5:1.00]
+; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/movbe-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movbe-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movbe-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movbe-schedule.ll Fri Apr  6 04:00:51 2018
@@ -12,7 +12,7 @@
 define i16 @test_movbe_i16(i16 *%a0, i16 %a1, i16 *%a2) {
 ; GENERIC-LABEL: test_movbe_i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movbew (%rdi), %ax # sched: [5:0.50]
+; GENERIC-NEXT:    movbew (%rdi), %ax # sched: [6:0.50]
 ; GENERIC-NEXT:    movbew %si, (%rdx) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -72,7 +72,7 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @test_movbe_i32(i32 *%a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_movbe_i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movbel (%rdi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    movbel (%rdi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    movbel %esi, (%rdx) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -132,7 +132,7 @@ declare i32 @llvm.bswap.i32(i32)
 define i64 @test_movbe_i64(i64 *%a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: test_movbe_i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movbeq (%rdi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    movbeq (%rdi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    movbeq %rsi, (%rdx) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll Fri Apr  6 04:00:51 2018
@@ -42,7 +42,7 @@ define i8 @test_aaa(i8 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    aaa # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_aaa:
 ; HASWELL:       # %bb.0:
@@ -126,7 +126,7 @@ define i8 @test_aad(i16 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    aad # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_aad:
 ; HASWELL:       # %bb.0:
@@ -210,7 +210,7 @@ define i16 @test_aam(i8 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    aam # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_aam:
 ; HASWELL:       # %bb.0:
@@ -294,7 +294,7 @@ define i8 @test_aas(i8 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    aas # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_aas:
 ; HASWELL:       # %bb.0:
@@ -382,7 +382,7 @@ define void @test_arpl(i16 %a0, i16 *%a1
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    arpl %ax, (%ecx) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_arpl:
 ; HASWELL:       # %bb.0:
@@ -504,7 +504,7 @@ define void @test_bound(i16 %a0, i16 *%a
 ; SANDY-NEXT:    bound %ecx, (%edx) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    popl %esi # sched: [6:0.50]
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_bound:
 ; HASWELL:       # %bb.0:
@@ -636,7 +636,7 @@ define i8 @test_daa(i8 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    daa # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_daa:
 ; HASWELL:       # %bb.0:
@@ -720,7 +720,7 @@ define i8 @test_das(i8 %a0) optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    das # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_das:
 ; HASWELL:       # %bb.0:
@@ -812,7 +812,7 @@ define void @test_dec16(i16 %a0, i16* %a
 ; SANDY-NEXT:    decw %ax # sched: [1:0.33]
 ; SANDY-NEXT:    decw (%ecx) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_dec16:
 ; HASWELL:       # %bb.0:
@@ -915,7 +915,7 @@ define void @test_dec32(i32 %a0, i32* %a
 ; SANDY-NEXT:    decl %eax # sched: [1:0.33]
 ; SANDY-NEXT:    decl (%ecx) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_dec32:
 ; HASWELL:       # %bb.0:
@@ -1019,7 +1019,7 @@ define void @test_inc16(i16 %a0, i16* %a
 ; SANDY-NEXT:    incw %ax # sched: [1:0.33]
 ; SANDY-NEXT:    incw (%ecx) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_inc16:
 ; HASWELL:       # %bb.0:
@@ -1122,7 +1122,7 @@ define void @test_inc32(i32 %a0, i32* %a
 ; SANDY-NEXT:    incl %eax # sched: [1:0.33]
 ; SANDY-NEXT:    incl (%ecx) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_inc32:
 ; HASWELL:       # %bb.0:
@@ -1214,7 +1214,7 @@ define void @test_into() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    into # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_into:
 ; HASWELL:       # %bb.0:
@@ -1296,7 +1296,7 @@ define void @test_jcxz_jecxz() optsize {
 ; SANDY-NEXT:    jcxz JXTGT # sched: [2:1.00]
 ; SANDY-NEXT:    jecxz JXTGT # sched: [2:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_jcxz_jecxz:
 ; HASWELL:       # %bb.0:
@@ -1382,7 +1382,7 @@ define void @test_leave() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    leave # sched: [7:0.67]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_leave:
 ; HASWELL:       # %bb.0:
@@ -1496,7 +1496,7 @@ define void @test_pop_push() optsize {
 ; SANDY-NEXT:    pushl %fs # sched: [100:0.33]
 ; SANDY-NEXT:    pushl %gs # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_pop_push:
 ; HASWELL:       # %bb.0:
@@ -1660,7 +1660,7 @@ define i16 @test_pop_push_16(i16 %a0, i1
 ; SANDY-NEXT:    # sched: [1:1.00]
 ; SANDY-NEXT:    pushw $7 # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_pop_push_16:
 ; HASWELL:       # %bb.0:
@@ -1812,7 +1812,7 @@ define i32 @test_pop_push_32(i32 %a0, i3
 ; SANDY-NEXT:    # sched: [1:1.00]
 ; SANDY-NEXT:    pushl $7 # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_pop_push_32:
 ; HASWELL:       # %bb.0:
@@ -1941,12 +1941,12 @@ define void @test_popa_popf_pusha_pushf(
 ; SANDY-LABEL: test_popa_popf_pusha_pushf:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    popal # sched: [4:0.50]
-; SANDY-NEXT:    popfl # sched: [4:0.50]
+; SANDY-NEXT:    popal # sched: [5:0.50]
+; SANDY-NEXT:    popfl # sched: [5:0.50]
 ; SANDY-NEXT:    pushal # sched: [1:1.00]
 ; SANDY-NEXT:    pushfl # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_popa_popf_pusha_pushf:
 ; HASWELL:       # %bb.0:
@@ -2049,14 +2049,14 @@ define void @test_ret() optsize {
 ; SANDY-LABEL: test_ret:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ; SANDY-NEXT:    retl $4095 # imm = 0xFFF
-; SANDY-NEXT:    # sched: [5:1.00]
-; SANDY-NEXT:    lretl # sched: [5:1.00]
+; SANDY-NEXT:    # sched: [6:1.00]
+; SANDY-NEXT:    lretl # sched: [6:1.00]
 ; SANDY-NEXT:    lretl $4095 # imm = 0xFFF
-; SANDY-NEXT:    # sched: [5:1.00]
+; SANDY-NEXT:    # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_ret:
 ; HASWELL:       # %bb.0:
@@ -2160,7 +2160,7 @@ define i8 @test_salc() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    salc # sched: [1:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_salc:
 ; HASWELL:       # %bb.0:
@@ -2252,9 +2252,9 @@ define void @test_xchg_32(i32 %a0, i32 %
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    xchgl %eax, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    xchgl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    xchgl %eax, (%edx) # sched: [5:1.00]
+; SANDY-NEXT:    xchgl %eax, (%edx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_xchg_32:
 ; HASWELL:       # %bb.0:

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Fri Apr  6 04:00:51 2018
@@ -4638,8 +4638,8 @@ define void @test_cmpxchg8b_cmpxchg16b(i
 ; GENERIC-LABEL: test_cmpxchg8b_cmpxchg16b:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    cmpxchg8b (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    cmpxchg16b (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    cmpxchg8b (%rdi) # sched: [6:1.00]
+; GENERIC-NEXT:    cmpxchg16b (%rdi) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4662,8 +4662,8 @@ define void @test_cmpxchg8b_cmpxchg16b(i
 ; SANDY-LABEL: test_cmpxchg8b_cmpxchg16b:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    cmpxchg8b (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    cmpxchg16b (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    cmpxchg8b (%rdi) # sched: [6:1.00]
+; SANDY-NEXT:    cmpxchg16b (%rdi) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5132,13 +5132,13 @@ define void @test_div(i8 %a0, i16 %a1, i
 ; GENERIC-NEXT:    movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    divb %dil # sched: [25:10.00]
-; GENERIC-NEXT:    divb (%r8) # sched: [29:10.00]
+; GENERIC-NEXT:    divb (%r8) # sched: [30:10.00]
 ; GENERIC-NEXT:    divw %si # sched: [25:10.00]
-; GENERIC-NEXT:    divw (%r9) # sched: [29:10.00]
+; GENERIC-NEXT:    divw (%r9) # sched: [30:10.00]
 ; GENERIC-NEXT:    divl %edx # sched: [25:10.00]
-; GENERIC-NEXT:    divl (%rax) # sched: [29:10.00]
+; GENERIC-NEXT:    divl (%rax) # sched: [30:10.00]
 ; GENERIC-NEXT:    divq %rcx # sched: [25:10.00]
-; GENERIC-NEXT:    divq (%r10) # sched: [29:10.00]
+; GENERIC-NEXT:    divq (%r10) # sched: [30:10.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5180,13 +5180,13 @@ define void @test_div(i8 %a0, i16 %a1, i
 ; SANDY-NEXT:    movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    divb %dil # sched: [25:10.00]
-; SANDY-NEXT:    divb (%r8) # sched: [29:10.00]
+; SANDY-NEXT:    divb (%r8) # sched: [30:10.00]
 ; SANDY-NEXT:    divw %si # sched: [25:10.00]
-; SANDY-NEXT:    divw (%r9) # sched: [29:10.00]
+; SANDY-NEXT:    divw (%r9) # sched: [30:10.00]
 ; SANDY-NEXT:    divl %edx # sched: [25:10.00]
-; SANDY-NEXT:    divl (%rax) # sched: [29:10.00]
+; SANDY-NEXT:    divl (%rax) # sched: [30:10.00]
 ; SANDY-NEXT:    divq %rcx # sched: [25:10.00]
-; SANDY-NEXT:    divq (%r10) # sched: [29:10.00]
+; SANDY-NEXT:    divq (%r10) # sched: [30:10.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5380,13 +5380,13 @@ define void @test_idiv(i8 %a0, i16 %a1,
 ; GENERIC-NEXT:    movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    idivb %dil # sched: [25:10.00]
-; GENERIC-NEXT:    idivb (%r8) # sched: [29:10.00]
+; GENERIC-NEXT:    idivb (%r8) # sched: [30:10.00]
 ; GENERIC-NEXT:    idivw %si # sched: [25:10.00]
-; GENERIC-NEXT:    idivw (%r9) # sched: [29:10.00]
+; GENERIC-NEXT:    idivw (%r9) # sched: [30:10.00]
 ; GENERIC-NEXT:    idivl %edx # sched: [25:10.00]
-; GENERIC-NEXT:    idivl (%rax) # sched: [29:10.00]
+; GENERIC-NEXT:    idivl (%rax) # sched: [30:10.00]
 ; GENERIC-NEXT:    idivq %rcx # sched: [25:10.00]
-; GENERIC-NEXT:    idivq (%r10) # sched: [29:10.00]
+; GENERIC-NEXT:    idivq (%r10) # sched: [30:10.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5428,13 +5428,13 @@ define void @test_idiv(i8 %a0, i16 %a1,
 ; SANDY-NEXT:    movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    idivb %dil # sched: [25:10.00]
-; SANDY-NEXT:    idivb (%r8) # sched: [29:10.00]
+; SANDY-NEXT:    idivb (%r8) # sched: [30:10.00]
 ; SANDY-NEXT:    idivw %si # sched: [25:10.00]
-; SANDY-NEXT:    idivw (%r9) # sched: [29:10.00]
+; SANDY-NEXT:    idivw (%r9) # sched: [30:10.00]
 ; SANDY-NEXT:    idivl %edx # sched: [25:10.00]
-; SANDY-NEXT:    idivl (%rax) # sched: [29:10.00]
+; SANDY-NEXT:    idivl (%rax) # sched: [30:10.00]
 ; SANDY-NEXT:    idivq %rcx # sched: [25:10.00]
-; SANDY-NEXT:    idivq (%r10) # sched: [29:10.00]
+; SANDY-NEXT:    idivq (%r10) # sched: [30:10.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5542,7 +5542,7 @@ define void @test_imul_8(i8 %a0, i8* %a1
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    imulb %dil # sched: [3:1.00]
-; GENERIC-NEXT:    imulb (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    imulb (%rsi) # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5566,7 +5566,7 @@ define void @test_imul_8(i8 %a0, i8* %a1
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    imulb %dil # sched: [3:1.00]
-; SANDY-NEXT:    imulb (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    imulb (%rsi) # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5625,15 +5625,15 @@ define void @test_imul_16(i16 %a0, i16*
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    imulw %di # sched: [3:1.00]
-; GENERIC-NEXT:    imulw (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    imulw (%rsi) # sched: [8:1.00]
 ; GENERIC-NEXT:    imulw %di, %di # sched: [3:1.00]
-; GENERIC-NEXT:    imulw (%rsi), %di # sched: [7:1.00]
+; GENERIC-NEXT:    imulw (%rsi), %di # sched: [8:1.00]
 ; GENERIC-NEXT:    imulw $511, %di, %di # imm = 0x1FF
 ; GENERIC-NEXT:    # sched: [3:1.00]
 ; GENERIC-NEXT:    imulw $511, (%rsi), %di # imm = 0x1FF
-; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    # sched: [8:1.00]
 ; GENERIC-NEXT:    imulw $7, %di, %di # sched: [3:1.00]
-; GENERIC-NEXT:    imulw $7, (%rsi), %di # sched: [7:1.00]
+; GENERIC-NEXT:    imulw $7, (%rsi), %di # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5673,15 +5673,15 @@ define void @test_imul_16(i16 %a0, i16*
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    imulw %di # sched: [3:1.00]
-; SANDY-NEXT:    imulw (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    imulw (%rsi) # sched: [8:1.00]
 ; SANDY-NEXT:    imulw %di, %di # sched: [3:1.00]
-; SANDY-NEXT:    imulw (%rsi), %di # sched: [7:1.00]
+; SANDY-NEXT:    imulw (%rsi), %di # sched: [8:1.00]
 ; SANDY-NEXT:    imulw $511, %di, %di # imm = 0x1FF
 ; SANDY-NEXT:    # sched: [3:1.00]
 ; SANDY-NEXT:    imulw $511, (%rsi), %di # imm = 0x1FF
-; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    # sched: [8:1.00]
 ; SANDY-NEXT:    imulw $7, %di, %di # sched: [3:1.00]
-; SANDY-NEXT:    imulw $7, (%rsi), %di # sched: [7:1.00]
+; SANDY-NEXT:    imulw $7, (%rsi), %di # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5788,15 +5788,15 @@ define void @test_imul_32(i32 %a0, i32*
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    imull %edi # sched: [3:1.00]
-; GENERIC-NEXT:    imull (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    imull (%rsi) # sched: [8:1.00]
 ; GENERIC-NEXT:    imull %edi, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    imull (%rsi), %edi # sched: [7:1.00]
+; GENERIC-NEXT:    imull (%rsi), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    imull $665536, %edi, %edi # imm = 0xA27C0
 ; GENERIC-NEXT:    # sched: [3:1.00]
 ; GENERIC-NEXT:    imull $665536, (%rsi), %edi # imm = 0xA27C0
-; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    # sched: [8:1.00]
 ; GENERIC-NEXT:    imull $7, %edi, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    imull $7, (%rsi), %edi # sched: [7:1.00]
+; GENERIC-NEXT:    imull $7, (%rsi), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5836,15 +5836,15 @@ define void @test_imul_32(i32 %a0, i32*
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    imull %edi # sched: [3:1.00]
-; SANDY-NEXT:    imull (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    imull (%rsi) # sched: [8:1.00]
 ; SANDY-NEXT:    imull %edi, %edi # sched: [3:1.00]
-; SANDY-NEXT:    imull (%rsi), %edi # sched: [7:1.00]
+; SANDY-NEXT:    imull (%rsi), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    imull $665536, %edi, %edi # imm = 0xA27C0
 ; SANDY-NEXT:    # sched: [3:1.00]
 ; SANDY-NEXT:    imull $665536, (%rsi), %edi # imm = 0xA27C0
-; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    # sched: [8:1.00]
 ; SANDY-NEXT:    imull $7, %edi, %edi # sched: [3:1.00]
-; SANDY-NEXT:    imull $7, (%rsi), %edi # sched: [7:1.00]
+; SANDY-NEXT:    imull $7, (%rsi), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5951,15 +5951,15 @@ define void @test_imul_64(i64 %a0, i64*
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    imulq %rdi # sched: [3:1.00]
-; GENERIC-NEXT:    imulq (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    imulq (%rsi) # sched: [8:1.00]
 ; GENERIC-NEXT:    imulq %rdi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT:    imulq (%rsi), %rdi # sched: [7:1.00]
+; GENERIC-NEXT:    imulq (%rsi), %rdi # sched: [8:1.00]
 ; GENERIC-NEXT:    imulq $665536, %rdi, %rdi # imm = 0xA27C0
 ; GENERIC-NEXT:    # sched: [3:1.00]
 ; GENERIC-NEXT:    imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    # sched: [8:1.00]
 ; GENERIC-NEXT:    imulq $7, %rdi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT:    imulq $7, (%rsi), %rdi # sched: [7:1.00]
+; GENERIC-NEXT:    imulq $7, (%rsi), %rdi # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5999,15 +5999,15 @@ define void @test_imul_64(i64 %a0, i64*
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    imulq %rdi # sched: [3:1.00]
-; SANDY-NEXT:    imulq (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    imulq (%rsi) # sched: [8:1.00]
 ; SANDY-NEXT:    imulq %rdi, %rdi # sched: [3:1.00]
-; SANDY-NEXT:    imulq (%rsi), %rdi # sched: [7:1.00]
+; SANDY-NEXT:    imulq (%rsi), %rdi # sched: [8:1.00]
 ; SANDY-NEXT:    imulq $665536, %rdi, %rdi # imm = 0xA27C0
 ; SANDY-NEXT:    # sched: [3:1.00]
 ; SANDY-NEXT:    imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    # sched: [8:1.00]
 ; SANDY-NEXT:    imulq $7, %rdi, %rdi # sched: [3:1.00]
-; SANDY-NEXT:    imulq $7, (%rsi), %rdi # sched: [7:1.00]
+; SANDY-NEXT:    imulq $7, (%rsi), %rdi # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9888,7 +9888,7 @@ define void @test_popf_pushf() optsize {
 ; GENERIC-LABEL: test_popf_pushf:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    popfq # sched: [4:0.50]
+; GENERIC-NEXT:    popfq # sched: [5:0.50]
 ; GENERIC-NEXT:    pushfq # sched: [5:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9912,7 +9912,7 @@ define void @test_popf_pushf() optsize {
 ; SANDY-LABEL: test_popf_pushf:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    popfq # sched: [4:0.50]
+; SANDY-NEXT:    popfq # sched: [5:0.50]
 ; SANDY-NEXT:    pushfq # sched: [5:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -9974,16 +9974,16 @@ define void @test_rcl_rcr_8(i8 %a0, i8 %
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    rclb %dil # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrb %dil # sched: [1:0.50]
-; GENERIC-NEXT:    rclb (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrb (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclb (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrb (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclb $7, %dil # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrb $7, %dil # sched: [1:0.50]
-; GENERIC-NEXT:    rclb $7, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrb $7, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclb $7, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrb $7, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclb %cl, %dil # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrb %cl, %dil # sched: [1:0.50]
-; GENERIC-NEXT:    rclb %cl, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrb %cl, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclb %cl, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrb %cl, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10028,16 +10028,16 @@ define void @test_rcl_rcr_8(i8 %a0, i8 %
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    rclb %dil # sched: [1:0.50]
 ; SANDY-NEXT:    rcrb %dil # sched: [1:0.50]
-; SANDY-NEXT:    rclb (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrb (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclb (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrb (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclb $7, %dil # sched: [1:0.50]
 ; SANDY-NEXT:    rcrb $7, %dil # sched: [1:0.50]
-; SANDY-NEXT:    rclb $7, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrb $7, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclb $7, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrb $7, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclb %cl, %dil # sched: [1:0.50]
 ; SANDY-NEXT:    rcrb %cl, %dil # sched: [1:0.50]
-; SANDY-NEXT:    rclb %cl, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrb %cl, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclb %cl, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrb %cl, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10157,16 +10157,16 @@ define void @test_rcl_rcr_16(i16 %a0, i1
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    rclw %di # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrw %di # sched: [1:0.50]
-; GENERIC-NEXT:    rclw (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrw (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclw (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrw (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclw $7, %di # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT:    rclw $7, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrw $7, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclw $7, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrw $7, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclw %cl, %di # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrw %cl, %di # sched: [1:0.50]
-; GENERIC-NEXT:    rclw %cl, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrw %cl, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclw %cl, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrw %cl, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10211,16 +10211,16 @@ define void @test_rcl_rcr_16(i16 %a0, i1
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    rclw %di # sched: [1:0.50]
 ; SANDY-NEXT:    rcrw %di # sched: [1:0.50]
-; SANDY-NEXT:    rclw (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrw (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclw (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrw (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclw $7, %di # sched: [1:0.50]
 ; SANDY-NEXT:    rcrw $7, %di # sched: [1:0.50]
-; SANDY-NEXT:    rclw $7, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrw $7, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclw $7, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrw $7, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclw %cl, %di # sched: [1:0.50]
 ; SANDY-NEXT:    rcrw %cl, %di # sched: [1:0.50]
-; SANDY-NEXT:    rclw %cl, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrw %cl, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclw %cl, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrw %cl, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10340,16 +10340,16 @@ define void @test_rcl_rcr_32(i32 %a0, i3
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    rcll %edi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrl %edi # sched: [1:0.50]
-; GENERIC-NEXT:    rcll (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrl (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rcll (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrl (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rcll $7, %edi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT:    rcll $7, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrl $7, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rcll $7, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrl $7, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rcll %cl, %edi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrl %cl, %edi # sched: [1:0.50]
-; GENERIC-NEXT:    rcll %cl, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrl %cl, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rcll %cl, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrl %cl, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10394,16 +10394,16 @@ define void @test_rcl_rcr_32(i32 %a0, i3
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    rcll %edi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrl %edi # sched: [1:0.50]
-; SANDY-NEXT:    rcll (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrl (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rcll (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrl (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rcll $7, %edi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT:    rcll $7, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrl $7, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rcll $7, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrl $7, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rcll %cl, %edi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrl %cl, %edi # sched: [1:0.50]
-; SANDY-NEXT:    rcll %cl, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrl %cl, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rcll %cl, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrl %cl, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10523,16 +10523,16 @@ define void @test_rcl_rcr_64(i64 %a0, i6
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    rclq %rdi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrq %rdi # sched: [1:0.50]
-; GENERIC-NEXT:    rclq (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrq (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclq (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrq (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclq $7, %rdi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT:    rclq $7, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrq $7, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclq $7, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrq $7, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    rclq %cl, %rdi # sched: [1:0.50]
 ; GENERIC-NEXT:    rcrq %cl, %rdi # sched: [1:0.50]
-; GENERIC-NEXT:    rclq %cl, (%rdx) # sched: [5:1.00]
-; GENERIC-NEXT:    rcrq %cl, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    rclq %cl, (%rdx) # sched: [6:1.00]
+; GENERIC-NEXT:    rcrq %cl, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10577,16 +10577,16 @@ define void @test_rcl_rcr_64(i64 %a0, i6
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    rclq %rdi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrq %rdi # sched: [1:0.50]
-; SANDY-NEXT:    rclq (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrq (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclq (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrq (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclq $7, %rdi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT:    rclq $7, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrq $7, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclq $7, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrq $7, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    rclq %cl, %rdi # sched: [1:0.50]
 ; SANDY-NEXT:    rcrq %cl, %rdi # sched: [1:0.50]
-; SANDY-NEXT:    rclq %cl, (%rdx) # sched: [5:1.00]
-; SANDY-NEXT:    rcrq %cl, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    rclq %cl, (%rdx) # sched: [6:1.00]
+; SANDY-NEXT:    rcrq %cl, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10949,10 +10949,10 @@ define void @test_ret() optsize {
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:    retq $4095 # imm = 0xFFF
-; GENERIC-NEXT:    # sched: [5:1.00]
-; GENERIC-NEXT:    lretl # sched: [5:1.00]
+; GENERIC-NEXT:    # sched: [6:1.00]
+; GENERIC-NEXT:    lretl # sched: [6:1.00]
 ; GENERIC-NEXT:    lretl $4095 # imm = 0xFFF
-; GENERIC-NEXT:    # sched: [5:1.00]
+; GENERIC-NEXT:    # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10985,10 +10985,10 @@ define void @test_ret() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ; SANDY-NEXT:    retq $4095 # imm = 0xFFF
-; SANDY-NEXT:    # sched: [5:1.00]
-; SANDY-NEXT:    lretl # sched: [5:1.00]
+; SANDY-NEXT:    # sched: [6:1.00]
+; SANDY-NEXT:    lretl # sched: [6:1.00]
 ; SANDY-NEXT:    lretl $4095 # imm = 0xFFF
-; SANDY-NEXT:    # sched: [5:1.00]
+; SANDY-NEXT:    # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16121,7 +16121,7 @@ define void @test_xchg_8(i8 %a0, i8 %a1,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    xchgb %sil, %dil # sched: [1:0.33]
-; GENERIC-NEXT:    xchgb %dil, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    xchgb %dil, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16145,7 +16145,7 @@ define void @test_xchg_8(i8 %a0, i8 %a1,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    xchgb %sil, %dil # sched: [1:0.33]
-; SANDY-NEXT:    xchgb %dil, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    xchgb %dil, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16205,7 +16205,7 @@ define void @test_xchg_16(i16 %a0, i16 %
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    xchgw %di, %ax # sched: [1:0.33]
 ; GENERIC-NEXT:    xchgw %si, %di # sched: [1:0.33]
-; GENERIC-NEXT:    xchgw %di, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    xchgw %di, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16232,7 +16232,7 @@ define void @test_xchg_16(i16 %a0, i16 %
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    xchgw %di, %ax # sched: [1:0.33]
 ; SANDY-NEXT:    xchgw %si, %di # sched: [1:0.33]
-; SANDY-NEXT:    xchgw %di, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    xchgw %di, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16298,7 +16298,7 @@ define void @test_xchg_32(i32 %a0, i32 %
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    xchgl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    xchgl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    xchgl %edi, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    xchgl %edi, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16325,7 +16325,7 @@ define void @test_xchg_32(i32 %a0, i32 %
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    xchgl %edi, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    xchgl %esi, %edi # sched: [1:0.33]
-; SANDY-NEXT:    xchgl %edi, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    xchgl %edi, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16391,7 +16391,7 @@ define void @test_xchg_64(i64 %a0, i64 %
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    xchgq %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    xchgq %rsi, %rdi # sched: [1:0.33]
-; GENERIC-NEXT:    xchgq %rdi, (%rdx) # sched: [5:1.00]
+; GENERIC-NEXT:    xchgq %rdi, (%rdx) # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16418,7 +16418,7 @@ define void @test_xchg_64(i64 %a0, i64 %
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    xchgq %rdi, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    xchgq %rsi, %rdi # sched: [1:0.33]
-; SANDY-NEXT:    xchgq %rdi, (%rdx) # sched: [5:1.00]
+; SANDY-NEXT:    xchgq %rdi, (%rdx) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16483,7 +16483,7 @@ define void @test_xlat() optsize {
 ; GENERIC-LABEL: test_xlat:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    xlatb # sched: [4:0.50]
+; GENERIC-NEXT:    xlatb # sched: [5:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16504,7 +16504,7 @@ define void @test_xlat() optsize {
 ; SANDY-LABEL: test_xlat:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    xlatb # sched: [4:0.50]
+; SANDY-NEXT:    xlatb # sched: [5:0.50]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/sha-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sha-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sha-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sha-schedule.ll Fri Apr  6 04:00:51 2018
@@ -12,7 +12,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32
 ; GENERIC-LABEL: test_sha1msg1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1msg1:
@@ -43,7 +43,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32
 ; GENERIC-LABEL: test_sha1msg2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1msg2:
@@ -74,7 +74,7 @@ define <4 x i32> @test_sha1nexte(<4 x i3
 ; GENERIC-LABEL: test_sha1nexte:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1nexte:
@@ -105,7 +105,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i3
 ; GENERIC-LABEL: test_sha1rnds4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1rnds4:
@@ -140,7 +140,7 @@ define <4 x i32> @test_sha256msg1(<4 x i
 ; GENERIC-LABEL: test_sha256msg1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha256msg1:
@@ -171,7 +171,7 @@ define <4 x i32> @test_sha256msg2(<4 x i
 ; GENERIC-LABEL: test_sha256msg2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha256msg2:
@@ -204,7 +204,7 @@ define <4 x i32> @test_sha256rnds2(<4 x
 ; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [9:1.00]
+; GENERIC-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00]
 ; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Fri Apr  6 04:00:51 2018
@@ -623,7 +623,7 @@ define <4 x float> @test_dpps(<4 x float
 ; GENERIC-LABEL: test_dpps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
-; GENERIC-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_dpps:
@@ -635,13 +635,13 @@ define <4 x float> @test_dpps(<4 x float
 ; SANDY-SSE-LABEL: test_dpps:
 ; SANDY-SSE:       # %bb.0:
 ; SANDY-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
-; SANDY-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
-; SANDY-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: test_dpps:
@@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 ; GENERIC-LABEL: test_pextrw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrw:
@@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 ; SANDY-SSE-LABEL: test_pextrw:
 ; SANDY-SSE:       # %bb.0:
 ; SANDY-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_pextrw:

Modified: llvm/trunk/test/CodeGen/X86/tbm-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm-schedule.ll Fri Apr  6 04:00:51 2018
@@ -10,7 +10,7 @@ define i32 @test_x86_tbm_bextri_u32(i32
 ; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; GENERIC-NEXT:    # sched: [1:0.33]
 ; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
-; GENERIC-NEXT:    # sched: [5:0.50]
+; GENERIC-NEXT:    # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -35,7 +35,7 @@ define i64 @test_x86_tbm_bextri_u64(i64
 ; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; GENERIC-NEXT:    # sched: [1:0.33]
 ; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
-; GENERIC-NEXT:    # sched: [5:0.50]
+; GENERIC-NEXT:    # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -58,7 +58,7 @@ define i32 @test_x86_tbm_blcfill_u32(i32
 ; GENERIC-LABEL: test_x86_tbm_blcfill_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcfilll %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blcfilll (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blcfilll (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -81,7 +81,7 @@ define i64 @test_x86_tbm_blcfill_u64(i64
 ; GENERIC-LABEL: test_x86_tbm_blcfill_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcfillq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blcfillq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blcfillq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -104,7 +104,7 @@ define i32 @test_x86_tbm_blci_u32(i32 %a
 ; GENERIC-LABEL: test_x86_tbm_blci_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcil %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blcil (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blcil (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -129,7 +129,7 @@ define i64 @test_x86_tbm_blci_u64(i64 %a
 ; GENERIC-LABEL: test_x86_tbm_blci_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blciq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blciq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blciq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -154,7 +154,7 @@ define i32 @test_x86_tbm_blcic_u32(i32 %
 ; GENERIC-LABEL: test_x86_tbm_blcic_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcicl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blcicl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blcicl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -179,7 +179,7 @@ define i64 @test_x86_tbm_blcic_u64(i64 %
 ; GENERIC-LABEL: test_x86_tbm_blcic_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcicq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blcicq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blcicq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -204,7 +204,7 @@ define i32 @test_x86_tbm_blcmsk_u32(i32
 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcmskl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blcmskl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blcmskl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -227,7 +227,7 @@ define i64 @test_x86_tbm_blcmsk_u64(i64
 ; GENERIC-LABEL: test_x86_tbm_blcmsk_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcmskq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blcmskq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blcmskq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -250,7 +250,7 @@ define i32 @test_x86_tbm_blcs_u32(i32 %a
 ; GENERIC-LABEL: test_x86_tbm_blcs_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcsl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blcsl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blcsl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -273,7 +273,7 @@ define i64 @test_x86_tbm_blcs_u64(i64 %a
 ; GENERIC-LABEL: test_x86_tbm_blcs_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blcsq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blcsq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blcsq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -296,7 +296,7 @@ define i32 @test_x86_tbm_blsfill_u32(i32
 ; GENERIC-LABEL: test_x86_tbm_blsfill_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blsfilll %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blsfilll (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blsfilll (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -319,7 +319,7 @@ define i64 @test_x86_tbm_blsfill_u64(i64
 ; GENERIC-LABEL: test_x86_tbm_blsfill_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blsfillq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blsfillq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blsfillq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -342,7 +342,7 @@ define i32 @test_x86_tbm_blsic_u32(i32 %
 ; GENERIC-LABEL: test_x86_tbm_blsic_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blsicl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    blsicl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    blsicl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -367,7 +367,7 @@ define i64 @test_x86_tbm_blsic_u64(i64 %
 ; GENERIC-LABEL: test_x86_tbm_blsic_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    blsicq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    blsicq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    blsicq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -392,7 +392,7 @@ define i32 @test_x86_tbm_t1mskc_u32(i32
 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    t1mskcl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    t1mskcl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    t1mskcl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -417,7 +417,7 @@ define i64 @test_x86_tbm_t1mskc_u64(i64
 ; GENERIC-LABEL: test_x86_tbm_t1mskc_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    t1mskcq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    t1mskcq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    t1mskcq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -442,7 +442,7 @@ define i32 @test_x86_tbm_tzmsk_u32(i32 %
 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    tzmskl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    tzmskl (%rsi), %eax # sched: [5:0.50]
+; GENERIC-NEXT:    tzmskl (%rsi), %eax # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -467,7 +467,7 @@ define i64 @test_x86_tbm_tzmsk_u64(i64 %
 ; GENERIC-LABEL: test_x86_tbm_tzmsk_u64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    tzmskq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    tzmskq (%rsi), %rax # sched: [5:0.50]
+; GENERIC-NEXT:    tzmskq (%rsi), %rax # sched: [6:0.50]
 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Fri Apr  6 04:00:51 2018
@@ -38,7 +38,7 @@ define void @test_f2xm1() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    f2xm1 # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_f2xm1:
 ; HASWELL:       # %bb.0:
@@ -112,7 +112,7 @@ define void @test_fabs() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fabs # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fabs:
 ; HASWELL:       # %bb.0:
@@ -206,7 +206,7 @@ define void @test_fadd(float *%a0, doubl
 ; SANDY-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fadd:
 ; HASWELL:       # %bb.0:
@@ -330,7 +330,7 @@ define void @test_faddp_fiadd(i16 *%a0,
 ; SANDY-NEXT:    fiadds (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fiaddl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_faddp_fiadd:
 ; HASWELL:       # %bb.0:
@@ -442,7 +442,7 @@ define void @test_fbld_fbstp(i8* %a0) op
 ; SANDY-NEXT:    fbld (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    fbstp (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fbld_fbstp:
 ; HASWELL:       # %bb.0:
@@ -528,7 +528,7 @@ define void @test_fchs() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fchs # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fchs:
 ; HASWELL:       # %bb.0:
@@ -606,7 +606,7 @@ define void @test_fclex() optsize {
 ; SANDY-NEXT:    wait # sched: [100:0.33]
 ; SANDY-NEXT:    fnclex # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fclex:
 ; HASWELL:       # %bb.0:
@@ -686,7 +686,7 @@ define void @test_fnclex() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fnclex # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fnclex:
 ; HASWELL:       # %bb.0:
@@ -788,7 +788,7 @@ define void @test_fcmov() optsize {
 ; SANDY-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:2.00]
 ; SANDY-NEXT:    fcmovu %st(1), %st(0) # sched: [3:2.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcmov:
 ; HASWELL:       # %bb.0:
@@ -924,7 +924,7 @@ define void @test_fcom(float *%a0, doubl
 ; SANDY-NEXT:    fcoms (%ecx) # sched: [8:1.00]
 ; SANDY-NEXT:    fcoml (%eax) # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcom:
 ; HASWELL:       # %bb.0:
@@ -1052,7 +1052,7 @@ define void @test_fcomp_fcompp(float *%a
 ; SANDY-NEXT:    fcompl (%eax) # sched: [8:1.00]
 ; SANDY-NEXT:    fcompp # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcomp_fcompp:
 ; HASWELL:       # %bb.0:
@@ -1166,7 +1166,7 @@ define void @test_fcomi_fcomip() optsize
 ; SANDY-NEXT:    fcomi %st(3) # sched: [3:1.00]
 ; SANDY-NEXT:    fcompi %st(3) # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcomi_fcomip:
 ; HASWELL:       # %bb.0:
@@ -1246,7 +1246,7 @@ define void @test_fcos() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fcos # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcos:
 ; HASWELL:       # %bb.0:
@@ -1320,7 +1320,7 @@ define void @test_fdecstp() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fdecstp # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fdecstp:
 ; HASWELL:       # %bb.0:
@@ -1414,7 +1414,7 @@ define void @test_fdiv(float *%a0, doubl
 ; SANDY-NEXT:    fdivs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fdiv:
 ; HASWELL:       # %bb.0:
@@ -1538,7 +1538,7 @@ define void @test_fdivp_fidiv(i16 *%a0,
 ; SANDY-NEXT:    fidivs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fdivp_fidiv:
 ; HASWELL:       # %bb.0:
@@ -1662,7 +1662,7 @@ define void @test_fdivr(float *%a0, doub
 ; SANDY-NEXT:    fdivrs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivrl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fdivr:
 ; HASWELL:       # %bb.0:
@@ -1786,7 +1786,7 @@ define void @test_fdivrp_fidivr(i16 *%a0
 ; SANDY-NEXT:    fidivrs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivrl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fdivrp_fidivr:
 ; HASWELL:       # %bb.0:
@@ -1890,7 +1890,7 @@ define void @test_ffree() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    ffree %st(0) # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_ffree:
 ; HASWELL:       # %bb.0:
@@ -1984,7 +1984,7 @@ define void @test_ficom(i16 *%a0, i32 *%
 ; SANDY-NEXT:    ficomps (%ecx) # sched: [11:2.00]
 ; SANDY-NEXT:    ficompl (%eax) # sched: [11:2.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_ficom:
 ; HASWELL:       # %bb.0:
@@ -2108,7 +2108,7 @@ define void @test_fild(i16 *%a0, i32 *%a
 ; SANDY-NEXT:    fildl (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    fildll (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fild:
 ; HASWELL:       # %bb.0:
@@ -2212,7 +2212,7 @@ define void @test_fincstp() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fincstp # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fincstp:
 ; HASWELL:       # %bb.0:
@@ -2290,7 +2290,7 @@ define void @test_finit() optsize {
 ; SANDY-NEXT:    wait # sched: [100:0.33]
 ; SANDY-NEXT:    fninit # sched: [5:1.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_finit:
 ; HASWELL:       # %bb.0:
@@ -2370,7 +2370,7 @@ define void @test_fninit() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fninit # sched: [5:1.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fninit:
 ; HASWELL:       # %bb.0:
@@ -2484,7 +2484,7 @@ define void @test_fist_fistp_fisttp(i16*
 ; SANDY-NEXT:    fisttpl (%ecx) # sched: [5:1.00]
 ; SANDY-NEXT:    fisttpll (%eax) # sched: [5:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fist_fistp_fisttp:
 ; HASWELL:       # %bb.0:
@@ -2642,7 +2642,7 @@ define void @test_fld(i16* %a0, i32* %a1
 ; SANDY-NEXT:    fldl (%ecx) # sched: [9:1.00]
 ; SANDY-NEXT:    fldt (%eax) # sched: [9:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fld:
 ; HASWELL:       # %bb.0:
@@ -2760,7 +2760,7 @@ define void @test_fldcw_fldenv(i8* %a0)
 ; SANDY-NEXT:    fldcw (%eax) # sched: [8:2.00]
 ; SANDY-NEXT:    fldenv (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fldcw_fldenv:
 ; HASWELL:       # %bb.0:
@@ -2866,7 +2866,7 @@ define void @test_fld1_fldl2e_fldl2t_fld
 ; SANDY-NEXT:    fldpi # sched: [100:0.33]
 ; SANDY-NEXT:    fldz # sched: [1:?]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
 ; HASWELL:       # %bb.0:
@@ -2990,7 +2990,7 @@ define void @test_fmul(float *%a0, doubl
 ; SANDY-NEXT:    fmuls (%ecx) # sched: [12:1.00]
 ; SANDY-NEXT:    fmull (%eax) # sched: [12:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fmul:
 ; HASWELL:       # %bb.0:
@@ -3114,7 +3114,7 @@ define void @test_fmulp_fimul(i16 *%a0,
 ; SANDY-NEXT:    fimuls (%ecx) # sched: [15:1.00]
 ; SANDY-NEXT:    fimull (%eax) # sched: [15:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fmulp_fimul:
 ; HASWELL:       # %bb.0:
@@ -3218,7 +3218,7 @@ define void @test_fnop() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fnop # sched: [1:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fnop:
 ; HASWELL:       # %bb.0:
@@ -3292,7 +3292,7 @@ define void @test_fpatan() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fpatan # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fpatan:
 ; HASWELL:       # %bb.0:
@@ -3370,7 +3370,7 @@ define void @test_fprem_fprem1() optsize
 ; SANDY-NEXT:    fprem # sched: [100:0.33]
 ; SANDY-NEXT:    fprem1 # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fprem_fprem1:
 ; HASWELL:       # %bb.0:
@@ -3450,7 +3450,7 @@ define void @test_fptan() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fptan # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fptan:
 ; HASWELL:       # %bb.0:
@@ -3524,7 +3524,7 @@ define void @test_frndint() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    frndint # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_frndint:
 ; HASWELL:       # %bb.0:
@@ -3602,7 +3602,7 @@ define void @test_frstor(i8* %a0) optsiz
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    frstor (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_frstor:
 ; HASWELL:       # %bb.0:
@@ -3690,7 +3690,7 @@ define void @test_fsave(i8* %a0) optsize
 ; SANDY-NEXT:    wait # sched: [100:0.33]
 ; SANDY-NEXT:    fnsave (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsave:
 ; HASWELL:       # %bb.0:
@@ -3780,7 +3780,7 @@ define void @test_fnsave(i8* %a0) optsiz
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fnsave (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fnsave:
 ; HASWELL:       # %bb.0:
@@ -3860,7 +3860,7 @@ define void @test_fscale() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fscale # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fscale:
 ; HASWELL:       # %bb.0:
@@ -3934,7 +3934,7 @@ define void @test_fsin() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fsin # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsin:
 ; HASWELL:       # %bb.0:
@@ -4008,7 +4008,7 @@ define void @test_fsincos() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fsincos # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsincos:
 ; HASWELL:       # %bb.0:
@@ -4082,7 +4082,7 @@ define void @test_fsqrt() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fsqrt # sched: [14:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsqrt:
 ; HASWELL:       # %bb.0:
@@ -4192,7 +4192,7 @@ define void @test_fst_fstp(i16* %a0, i32
 ; SANDY-NEXT:    fstpl (%ecx) # sched: [6:1.00]
 ; SANDY-NEXT:    fstpt (%eax) # sched: [6:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fst_fstp:
 ; HASWELL:       # %bb.0:
@@ -4344,7 +4344,7 @@ define void @test_fstcw_fstenv_fstsw(i8*
 ; SANDY-NEXT:    wait # sched: [100:0.33]
 ; SANDY-NEXT:    fnstsw (%eax) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fstcw_fstenv_fstsw:
 ; HASWELL:       # %bb.0:
@@ -4466,7 +4466,7 @@ define void @test_fnstcw_fnstenv_fnstsw(
 ; SANDY-NEXT:    fnstenv (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    fnstsw (%eax) # sched: [7:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fnstcw_fnstenv_fnstsw:
 ; HASWELL:       # %bb.0:
@@ -4578,7 +4578,7 @@ define void @test_fsub(float *%a0, doubl
 ; SANDY-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsub:
 ; HASWELL:       # %bb.0:
@@ -4702,7 +4702,7 @@ define void @test_fsubp_fisub(i16 *%a0,
 ; SANDY-NEXT:    fisubs (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fisubl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsubp_fisub:
 ; HASWELL:       # %bb.0:
@@ -4826,7 +4826,7 @@ define void @test_fsubr(float *%a0, doub
 ; SANDY-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsubr:
 ; HASWELL:       # %bb.0:
@@ -4950,7 +4950,7 @@ define void @test_fsubrp_fisubr(i16 *%a0
 ; SANDY-NEXT:    fisubrs (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fisubrl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fsubrp_fisubr:
 ; HASWELL:       # %bb.0:
@@ -5054,7 +5054,7 @@ define void @test_ftst() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    ftst # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_ftst:
 ; HASWELL:       # %bb.0:
@@ -5144,7 +5144,7 @@ define void @test_fucom_fucomp_fucompp()
 ; SANDY-NEXT:    fucomp %st(3) # sched: [1:1.00]
 ; SANDY-NEXT:    fucompp # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fucom_fucomp_fucompp:
 ; HASWELL:       # %bb.0:
@@ -5246,7 +5246,7 @@ define void @test_fucomi_fucomip() optsi
 ; SANDY-NEXT:    fucomi %st(3) # sched: [3:1.00]
 ; SANDY-NEXT:    fucompi %st(3) # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fucomi_fucomip:
 ; HASWELL:       # %bb.0:
@@ -5326,7 +5326,7 @@ define void @test_fwait() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    wait # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fwait:
 ; HASWELL:       # %bb.0:
@@ -5400,7 +5400,7 @@ define void @test_fxam() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fxam # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fxam:
 ; HASWELL:       # %bb.0:
@@ -5478,7 +5478,7 @@ define void @test_fxch() optsize {
 ; SANDY-NEXT:    fxch %st(1) # sched: [1:0.33]
 ; SANDY-NEXT:    fxch %st(3) # sched: [1:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fxch:
 ; HASWELL:       # %bb.0:
@@ -5566,7 +5566,7 @@ define void @test_fxrstor_fxsave(i8* %a0
 ; SANDY-NEXT:    fxrstor (%eax) # sched: [5:2.00]
 ; SANDY-NEXT:    fxsave (%eax) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fxrstor_fxsave:
 ; HASWELL:       # %bb.0:
@@ -5652,7 +5652,7 @@ define void @test_fxtract() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fxtract # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fxtract:
 ; HASWELL:       # %bb.0:
@@ -5726,7 +5726,7 @@ define void @test_fyl2x() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fyl2x # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fyl2x:
 ; HASWELL:       # %bb.0:
@@ -5800,7 +5800,7 @@ define void @test_fyl2xp1() optsize {
 ; SANDY-NEXT:    #APP
 ; SANDY-NEXT:    fyl2xp1 # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    retl # sched: [5:1.00]
+; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fyl2xp1:
 ; HASWELL:       # %bb.0:

Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=329388&r1=329387&r2=329388&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Fri Apr  6 04:00:51 2018
@@ -11,8 +11,8 @@ define void @test_vfrczpd(<2 x double> %
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfrczpd %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vfrczpd %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT:    vfrczpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vfrczpd (%rsi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vfrczpd (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vfrczpd (%rsi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -37,8 +37,8 @@ define void @test_vfrczps(<4 x float> %a
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfrczps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vfrczps %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT:    vfrczps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vfrczps (%rsi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vfrczps (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vfrczps (%rsi), %ymm1 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -62,7 +62,7 @@ define void @test_vfrczsd(<2 x double> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfrczsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vfrczsd (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vfrczsd (%rdi), %xmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -82,7 +82,7 @@ define void @test_vfrczss(<4 x float> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vfrczss %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vfrczss (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vfrczss (%rdi), %xmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -102,8 +102,8 @@ define void @test_vpcmov_128(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -124,8 +124,8 @@ define void @test_vpcmov_256(<4 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -151,10 +151,10 @@ define void @test_vpcom(<2 x i64> %a0, <
 ; GENERIC-NEXT:    vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -183,10 +183,10 @@ define void @test_vpcomu(<2 x i64> %a0,
 ; GENERIC-NEXT:    vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x do
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x do
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x fl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x fl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -304,7 +304,7 @@ define void @test_vphaddbd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddbd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddbd (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddbd (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -324,7 +324,7 @@ define void @test_vphaddbq(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddbq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddbq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddbq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -344,7 +344,7 @@ define void @test_vphaddbw(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddbw %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddbw (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddbw (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -364,7 +364,7 @@ define void @test_vphadddq(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphadddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphadddq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphadddq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -384,7 +384,7 @@ define void @test_vphaddubd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddubd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddubd (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddubd (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -404,7 +404,7 @@ define void @test_vphaddubq(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddubq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddubq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddubq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -424,7 +424,7 @@ define void @test_vphaddubw(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddubw %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddubw (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddubw (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -444,7 +444,7 @@ define void @test_vphaddudq(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddudq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddudq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddudq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -464,7 +464,7 @@ define void @test_vphadduwd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphadduwd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphadduwd (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphadduwd (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -484,7 +484,7 @@ define void @test_vphadduwq(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphadduwq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphadduwq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphadduwq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -504,7 +504,7 @@ define void @test_vphaddwd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddwd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddwd (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddwd (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -524,7 +524,7 @@ define void @test_vphaddwq(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphaddwq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphaddwq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphaddwq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -544,7 +544,7 @@ define void @test_vphsubbw(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphsubbw %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphsubbw (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphsubbw (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -564,7 +564,7 @@ define void @test_vphsubdq(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphsubdq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphsubdq (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphsubdq (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -584,7 +584,7 @@ define void @test_vphsubwd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vphsubwd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vphsubwd (%rdi), %xmm0 # sched: [5:0.50]
+; GENERIC-NEXT:    vphsubwd (%rdi), %xmm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -604,7 +604,7 @@ define void @test_vpmacsdd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -624,7 +624,7 @@ define void @test_vpmacsdqh(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -644,7 +644,7 @@ define void @test_vpmacsdql(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -664,7 +664,7 @@ define void @test_vpmacssdd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -684,7 +684,7 @@ define void @test_vpmacssdqh(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -704,7 +704,7 @@ define void @test_vpmacssdql(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -724,7 +724,7 @@ define void @test_vpmacsswd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -744,7 +744,7 @@ define void @test_vpmacssww(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -764,7 +764,7 @@ define void @test_vpmacswd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -784,7 +784,7 @@ define void @test_vpmacsww(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -804,7 +804,7 @@ define void @test_vpmadcsswd(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -824,7 +824,7 @@ define void @test_vpmadcswd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -844,8 +844,8 @@ define void @test_vpperm(<2 x i64> %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -869,22 +869,22 @@ define void @test_vprot(<2 x i64> %a0, <
 ; GENERIC-NEXT:    vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vprotb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotb %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotd %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotq %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotw %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vprotb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotb %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotd %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    vprotb $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vprotd $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vprotq $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vprotw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vprotb $7, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotd $7, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotq $7, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vprotw $7, (%rdi), %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vprotb $7, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotd $7, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotq $7, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vprotw $7, (%rdi), %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -925,14 +925,14 @@ define void @test_vpsha(<2 x i64> %a0, <
 ; GENERIC-NEXT:    vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshab (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshad (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshaq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshaw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshab %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshad %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshaq %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshaw %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpshab (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshad (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshaq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshaw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshab %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshad %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshaq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshaw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -965,14 +965,14 @@ define void @test_vpshl(<2 x i64> %a0, <
 ; GENERIC-NEXT:    vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpshlb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshlb %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshld %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshlq %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpshlw %xmm0, (%rdi), %xmm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpshlb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshlb %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshld %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshlq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpshlw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list