[llvm] r324533 - [X86] When doing callee save/restore for k-registers make sure we don't use KMOVQ on non-BWI targets

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 7 13:41:50 PST 2018


Author: ctopper
Date: Wed Feb  7 13:41:50 2018
New Revision: 324533

URL: http://llvm.org/viewvc/llvm-project?rev=324533&view=rev
Log:
[X86] When doing callee save/restore for k-registers make sure we don't use KMOVQ on non-BWI targets

If we are saving/restoring k-registers, the default behavior of getMinimalRegisterClass will find the VK64 class with a spill size of 64 bits. This will cause the KMOVQ opcode to be used for save/restore. If we don't have have BWI instructions we need to constrain the class returned to give us VK16 with a 16-bit spill size. We can do this by passing the either v16i1 or v64i1 into getMinimalRegisterClass.

Also add asserts to make sure BWI is enabled anytime we use KMOVD/KMOVQ. These are what caught this bug.

Fixes PR36256

Differential Revision: https://reviews.llvm.org/D42989

Modified:
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
    llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=324533&r1=324532&r2=324533&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Wed Feb  7 13:41:50 2018
@@ -1925,7 +1925,12 @@ bool X86FrameLowering::assignCalleeSaved
     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
       continue;
 
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    // If this is k-register make sure we lookup via the largest legal type.
+    MVT VT = MVT::Other;
+    if (X86::VK16RegClass.contains(Reg))
+      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
+
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
     unsigned Size = TRI->getSpillSize(*RC);
     unsigned Align = TRI->getSpillAlignment(*RC);
     // ensure alignment
@@ -1992,9 +1997,15 @@ bool X86FrameLowering::spillCalleeSavedR
     unsigned Reg = CSI[i-1].getReg();
     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
       continue;
+
+    // If this is k-register make sure we lookup via the largest legal type.
+    MVT VT = MVT::Other;
+    if (X86::VK16RegClass.contains(Reg))
+      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
+
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
 
     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
                             TRI);
@@ -2068,7 +2079,12 @@ bool X86FrameLowering::restoreCalleeSave
         X86::GR32RegClass.contains(Reg))
       continue;
 
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    // If this is k-register make sure we lookup via the largest legal type.
+    MVT VT = MVT::Other;
+    if (X86::VK16RegClass.contains(Reg))
+      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
+
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
   }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=324533&r1=324532&r2=324533&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Feb  7 13:41:50 2018
@@ -6919,8 +6919,10 @@ static unsigned getLoadStoreRegOpcode(un
         (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
     if (X86::RFP32RegClass.hasSubClassEq(RC))
       return load ? X86::LD_Fp32m : X86::ST_Fp32m;
-    if (X86::VK32RegClass.hasSubClassEq(RC))
+    if (X86::VK32RegClass.hasSubClassEq(RC)) {
+      assert(STI.hasBWI() && "KMOVD requires BWI");
       return load ? X86::KMOVDkm : X86::KMOVDmk;
+    }
     llvm_unreachable("Unknown 4-byte regclass");
   case 8:
     if (X86::GR64RegClass.hasSubClassEq(RC))
@@ -6933,8 +6935,10 @@ static unsigned getLoadStoreRegOpcode(un
       return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
     if (X86::RFP64RegClass.hasSubClassEq(RC))
       return load ? X86::LD_Fp64m : X86::ST_Fp64m;
-    if (X86::VK64RegClass.hasSubClassEq(RC))
+    if (X86::VK64RegClass.hasSubClassEq(RC)) {
+      assert(STI.hasBWI() && "KMOVQ requires BWI");
       return load ? X86::KMOVQkm : X86::KMOVQmk;
+    }
     llvm_unreachable("Unknown 8-byte regclass");
   case 10:
     assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");

Modified: llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll?rev=324533&r1=324532&r2=324533&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll Wed Feb  7 13:41:50 2018
@@ -181,111 +181,217 @@ define intel_ocl_bicc <16 x float> @test
 ; WIN32-NEXT:    calll _func_float16
 ; WIN32-NEXT:    retl
 ;
-; WIN64-LABEL: test_prolog_epilog:
-; WIN64:       # %bb.0:
-; WIN64-NEXT:    pushq %rbp
-; WIN64-NEXT:    subq $1328, %rsp # imm = 0x530
-; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
-; WIN64-NEXT:    kmovq %k7, 1192(%rbp) # 8-byte Spill
-; WIN64-NEXT:    kmovq %k6, 1184(%rbp) # 8-byte Spill
-; WIN64-NEXT:    kmovq %k5, 1176(%rbp) # 8-byte Spill
-; WIN64-NEXT:    kmovq %k4, 1168(%rbp) # 8-byte Spill
-; WIN64-NEXT:    vmovaps %zmm21, 1056(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm20, 960(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm19, 896(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm18, 832(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm17, 768(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm16, 704(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm15, 640(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm14, 576(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm13, 512(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm12, 448(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm11, 384(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm10, 320(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm9, 256(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm8, 192(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm7, 128(%rbp) # 64-byte Spill
-; WIN64-NEXT:    vmovaps %zmm6, 64(%rbp) # 64-byte Spill
-; WIN64-NEXT:    andq $-64, %rsp
-; WIN64-NEXT:    vmovaps %zmm1, {{[0-9]+}}(%rsp)
-; WIN64-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT:    callq func_float16
-; WIN64-NEXT:    vmovaps 64(%rbp), %zmm6 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 128(%rbp), %zmm7 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 192(%rbp), %zmm8 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 256(%rbp), %zmm9 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 320(%rbp), %zmm10 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 384(%rbp), %zmm11 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 448(%rbp), %zmm12 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 512(%rbp), %zmm13 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 576(%rbp), %zmm14 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 640(%rbp), %zmm15 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 704(%rbp), %zmm16 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 768(%rbp), %zmm17 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 832(%rbp), %zmm18 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 896(%rbp), %zmm19 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 960(%rbp), %zmm20 # 64-byte Reload
-; WIN64-NEXT:    vmovaps 1056(%rbp), %zmm21 # 64-byte Reload
-; WIN64-NEXT:    kmovq 1168(%rbp), %k4 # 8-byte Reload
-; WIN64-NEXT:    kmovq 1176(%rbp), %k5 # 8-byte Reload
-; WIN64-NEXT:    kmovq 1184(%rbp), %k6 # 8-byte Reload
-; WIN64-NEXT:    kmovq 1192(%rbp), %k7 # 8-byte Reload
-; WIN64-NEXT:    leaq 1200(%rbp), %rsp
-; WIN64-NEXT:    popq %rbp
-; WIN64-NEXT:    retq
+; WIN64-KNL-LABEL: test_prolog_epilog:
+; WIN64-KNL:       # %bb.0:
+; WIN64-KNL-NEXT:    pushq %rbp
+; WIN64-KNL-NEXT:    subq $1328, %rsp # imm = 0x530
+; WIN64-KNL-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-KNL-NEXT:    kmovw %k7, 1198(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT:    kmovw %k6, 1196(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT:    kmovw %k5, 1194(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT:    kmovw %k4, 1192(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm21, 1104(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm20, 992(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm19, 896(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm18, 832(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm17, 768(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm16, 704(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm15, 640(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm14, 576(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm13, 512(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm12, 448(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm11, 384(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm10, 320(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm9, 256(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm8, 192(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm7, 128(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    vmovaps %zmm6, 64(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT:    andq $-64, %rsp
+; WIN64-KNL-NEXT:    vmovaps %zmm1, {{[0-9]+}}(%rsp)
+; WIN64-KNL-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp)
+; WIN64-KNL-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-KNL-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-KNL-NEXT:    callq func_float16
+; WIN64-KNL-NEXT:    vmovaps 64(%rbp), %zmm6 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 128(%rbp), %zmm7 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 192(%rbp), %zmm8 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 256(%rbp), %zmm9 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 320(%rbp), %zmm10 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 384(%rbp), %zmm11 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 448(%rbp), %zmm12 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 512(%rbp), %zmm13 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 576(%rbp), %zmm14 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 640(%rbp), %zmm15 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 704(%rbp), %zmm16 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 768(%rbp), %zmm17 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 832(%rbp), %zmm18 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 896(%rbp), %zmm19 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 992(%rbp), %zmm20 # 64-byte Reload
+; WIN64-KNL-NEXT:    vmovaps 1104(%rbp), %zmm21 # 64-byte Reload
+; WIN64-KNL-NEXT:    kmovw 1192(%rbp), %k4 # 2-byte Reload
+; WIN64-KNL-NEXT:    kmovw 1194(%rbp), %k5 # 2-byte Reload
+; WIN64-KNL-NEXT:    kmovw 1196(%rbp), %k6 # 2-byte Reload
+; WIN64-KNL-NEXT:    kmovw 1198(%rbp), %k7 # 2-byte Reload
+; WIN64-KNL-NEXT:    leaq 1200(%rbp), %rsp
+; WIN64-KNL-NEXT:    popq %rbp
+; WIN64-KNL-NEXT:    retq
 ;
-; X64-LABEL: test_prolog_epilog:
-; X64:       ## %bb.0:
-; X64-NEXT:    pushq %rsi
-; X64-NEXT:    pushq %rdi
-; X64-NEXT:    subq $1192, %rsp ## imm = 0x4A8
-; X64-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; X64-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; X64-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; X64-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; X64-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; X64-NEXT:    vmovups %zmm16, (%rsp) ## 64-byte Spill
-; X64-NEXT:    callq _func_float16
-; X64-NEXT:    vmovups (%rsp), %zmm16 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
-; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
-; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
-; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
-; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
-; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
-; X64-NEXT:    addq $1192, %rsp ## imm = 0x4A8
-; X64-NEXT:    popq %rdi
-; X64-NEXT:    popq %rsi
-; X64-NEXT:    retq
+; WIN64-SKX-LABEL: test_prolog_epilog:
+; WIN64-SKX:       # %bb.0:
+; WIN64-SKX-NEXT:    pushq %rbp
+; WIN64-SKX-NEXT:    subq $1328, %rsp # imm = 0x530
+; WIN64-SKX-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-SKX-NEXT:    kmovq %k7, 1192(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT:    kmovq %k6, 1184(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT:    kmovq %k5, 1176(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT:    kmovq %k4, 1168(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm21, 1056(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm20, 960(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm19, 896(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm18, 832(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm17, 768(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm16, 704(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm15, 640(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm14, 576(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm13, 512(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm12, 448(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm11, 384(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm10, 320(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm9, 256(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm8, 192(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm7, 128(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    vmovaps %zmm6, 64(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT:    andq $-64, %rsp
+; WIN64-SKX-NEXT:    vmovaps %zmm1, {{[0-9]+}}(%rsp)
+; WIN64-SKX-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp)
+; WIN64-SKX-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-SKX-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-SKX-NEXT:    callq func_float16
+; WIN64-SKX-NEXT:    vmovaps 64(%rbp), %zmm6 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 128(%rbp), %zmm7 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 192(%rbp), %zmm8 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 256(%rbp), %zmm9 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 320(%rbp), %zmm10 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 384(%rbp), %zmm11 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 448(%rbp), %zmm12 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 512(%rbp), %zmm13 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 576(%rbp), %zmm14 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 640(%rbp), %zmm15 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 704(%rbp), %zmm16 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 768(%rbp), %zmm17 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 832(%rbp), %zmm18 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 896(%rbp), %zmm19 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 960(%rbp), %zmm20 # 64-byte Reload
+; WIN64-SKX-NEXT:    vmovaps 1056(%rbp), %zmm21 # 64-byte Reload
+; WIN64-SKX-NEXT:    kmovq 1168(%rbp), %k4 # 8-byte Reload
+; WIN64-SKX-NEXT:    kmovq 1176(%rbp), %k5 # 8-byte Reload
+; WIN64-SKX-NEXT:    kmovq 1184(%rbp), %k6 # 8-byte Reload
+; WIN64-SKX-NEXT:    kmovq 1192(%rbp), %k7 # 8-byte Reload
+; WIN64-SKX-NEXT:    leaq 1200(%rbp), %rsp
+; WIN64-SKX-NEXT:    popq %rbp
+; WIN64-SKX-NEXT:    retq
+;
+; X64-KNL-LABEL: test_prolog_epilog:
+; X64-KNL:       ## %bb.0:
+; X64-KNL-NEXT:    pushq %rsi
+; X64-KNL-NEXT:    pushq %rdi
+; X64-KNL-NEXT:    subq $1064, %rsp ## imm = 0x428
+; X64-KNL-NEXT:    kmovw %k7, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; X64-KNL-NEXT:    kmovw %k6, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; X64-KNL-NEXT:    kmovw %k5, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; X64-KNL-NEXT:    kmovw %k4, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    vmovups %zmm16, (%rsp) ## 64-byte Spill
+; X64-KNL-NEXT:    callq _func_float16
+; X64-KNL-NEXT:    vmovups (%rsp), %zmm16 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
+; X64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
+; X64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k4 ## 2-byte Reload
+; X64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k5 ## 2-byte Reload
+; X64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k6 ## 2-byte Reload
+; X64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k7 ## 2-byte Reload
+; X64-KNL-NEXT:    addq $1064, %rsp ## imm = 0x428
+; X64-KNL-NEXT:    popq %rdi
+; X64-KNL-NEXT:    popq %rsi
+; X64-KNL-NEXT:    retq
+;
+; X64-SKX-LABEL: test_prolog_epilog:
+; X64-SKX:       ## %bb.0:
+; X64-SKX-NEXT:    pushq %rsi
+; X64-SKX-NEXT:    pushq %rdi
+; X64-SKX-NEXT:    subq $1192, %rsp ## imm = 0x4A8
+; X64-SKX-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-SKX-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-SKX-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-SKX-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    vmovups %zmm16, (%rsp) ## 64-byte Spill
+; X64-SKX-NEXT:    callq _func_float16
+; X64-SKX-NEXT:    vmovups (%rsp), %zmm16 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
+; X64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
+; X64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
+; X64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
+; X64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
+; X64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
+; X64-SKX-NEXT:    addq $1192, %rsp ## imm = 0x4A8
+; X64-SKX-NEXT:    popq %rdi
+; X64-SKX-NEXT:    popq %rsi
+; X64-SKX-NEXT:    retq
    %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
    ret <16 x float> %c
 }

Modified: llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll?rev=324533&r1=324532&r2=324533&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll Wed Feb  7 13:41:50 2018
@@ -27,26 +27,26 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 72
 ; CHECK64-KNL-NEXT:    pushq %rcx ## encoding: [0x51]
 ; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 80
-; CHECK64-KNL-NEXT:    subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    ## imm = 0x870
-; CHECK64-KNL-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    subq $2096, %rsp ## encoding: [0x48,0x81,0xec,0x30,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    ## imm = 0x830
+; CHECK64-KNL-NEXT:    kmovw %k7, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k6, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k5, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k4, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k3, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k2, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x08,0x00,0x00]
 ; CHECK64-KNL-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
 ; CHECK64-KNL-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
 ; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
 ; CHECK64-KNL-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
@@ -109,7 +109,7 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
 ; CHECK64-KNL-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill
 ; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 2240
+; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 2176
 ; CHECK64-KNL-NEXT:    .cfi_offset %rcx, -80
 ; CHECK64-KNL-NEXT:    .cfi_offset %rdx, -72
 ; CHECK64-KNL-NEXT:    .cfi_offset %rsi, -64
@@ -119,46 +119,46 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK64-KNL-NEXT:    .cfi_offset %r10, -32
 ; CHECK64-KNL-NEXT:    .cfi_offset %r11, -24
 ; CHECK64-KNL-NEXT:    .cfi_offset %rax, -16
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm0, -2240
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm1, -2176
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm2, -2112
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm3, -2048
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm4, -1984
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm5, -1920
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm6, -1856
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm7, -1792
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm8, -1728
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm9, -1664
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm10, -1600
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm11, -1536
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm12, -1472
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm13, -1408
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm14, -1344
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm15, -1280
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm16, -1216
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm17, -1152
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm18, -1088
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm19, -1024
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm20, -960
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm21, -896
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm22, -832
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm23, -768
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm24, -704
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm25, -640
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm26, -576
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm27, -512
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm28, -448
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm29, -384
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm30, -320
-; CHECK64-KNL-NEXT:    .cfi_offset %xmm31, -224
-; CHECK64-KNL-NEXT:    .cfi_offset %k0, -144
-; CHECK64-KNL-NEXT:    .cfi_offset %k1, -136
-; CHECK64-KNL-NEXT:    .cfi_offset %k2, -128
-; CHECK64-KNL-NEXT:    .cfi_offset %k3, -120
-; CHECK64-KNL-NEXT:    .cfi_offset %k4, -112
-; CHECK64-KNL-NEXT:    .cfi_offset %k5, -104
-; CHECK64-KNL-NEXT:    .cfi_offset %k6, -96
-; CHECK64-KNL-NEXT:    .cfi_offset %k7, -88
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm0, -2176
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm1, -2112
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm2, -2048
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm3, -1984
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm4, -1920
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm5, -1856
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm6, -1792
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm7, -1728
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm8, -1664
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm9, -1600
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm10, -1536
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm11, -1472
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm12, -1408
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm13, -1344
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm14, -1280
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm15, -1216
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm16, -1152
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm17, -1088
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm18, -1024
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm19, -960
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm20, -896
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm21, -832
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm22, -768
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm23, -704
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm24, -640
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm25, -576
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm26, -512
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm27, -448
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm28, -384
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm29, -320
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm30, -256
+; CHECK64-KNL-NEXT:    .cfi_offset %xmm31, -192
+; CHECK64-KNL-NEXT:    .cfi_offset %k0, -96
+; CHECK64-KNL-NEXT:    .cfi_offset %k1, -94
+; CHECK64-KNL-NEXT:    .cfi_offset %k2, -92
+; CHECK64-KNL-NEXT:    .cfi_offset %k3, -90
+; CHECK64-KNL-NEXT:    .cfi_offset %k4, -88
+; CHECK64-KNL-NEXT:    .cfi_offset %k5, -86
+; CHECK64-KNL-NEXT:    .cfi_offset %k6, -84
+; CHECK64-KNL-NEXT:    .cfi_offset %k7, -82
 ; CHECK64-KNL-NEXT:    cld ## encoding: [0xfc]
 ; CHECK64-KNL-NEXT:    callq _bar ## encoding: [0xe8,A,A,A,A]
 ; CHECK64-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
@@ -225,25 +225,25 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
 ; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
 ; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
-; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00]
-; CHECK64-KNL-NEXT:    ## imm = 0x870
+; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k2 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k3 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k4 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k5 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k6 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k7 ## 2-byte Reload
+; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    addq $2096, %rsp ## encoding: [0x48,0x81,0xc4,0x30,0x08,0x00,0x00]
+; CHECK64-KNL-NEXT:    ## imm = 0x830
 ; CHECK64-KNL-NEXT:    popq %rcx ## encoding: [0x59]
 ; CHECK64-KNL-NEXT:    popq %rdx ## encoding: [0x5a]
 ; CHECK64-KNL-NEXT:    popq %rsi ## encoding: [0x5e]
@@ -512,26 +512,26 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK32-KNL-NEXT:    pushl %eax ## encoding: [0x50]
 ; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 16
-; CHECK32-KNL-NEXT:    subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    ## imm = 0x270
-; CHECK32-KNL-NEXT:    kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## imm = 0x230
+; CHECK32-KNL-NEXT:    kmovw %k7, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k6, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k5, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k4, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k3, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k2, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k1, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%esp) ## 2-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00]
 ; CHECK32-KNL-NEXT:    vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
 ; CHECK32-KNL-NEXT:    vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
 ; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
 ; CHECK32-KNL-NEXT:    vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
@@ -546,26 +546,26 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
 ; CHECK32-KNL-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill
 ; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 640
+; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 576
 ; CHECK32-KNL-NEXT:    .cfi_offset %eax, -16
 ; CHECK32-KNL-NEXT:    .cfi_offset %ecx, -12
 ; CHECK32-KNL-NEXT:    .cfi_offset %edx, -8
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm0, -640
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm1, -576
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm2, -512
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm3, -448
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm4, -384
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm5, -320
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm6, -256
-; CHECK32-KNL-NEXT:    .cfi_offset %xmm7, -160
-; CHECK32-KNL-NEXT:    .cfi_offset %k0, -80
-; CHECK32-KNL-NEXT:    .cfi_offset %k1, -72
-; CHECK32-KNL-NEXT:    .cfi_offset %k2, -64
-; CHECK32-KNL-NEXT:    .cfi_offset %k3, -56
-; CHECK32-KNL-NEXT:    .cfi_offset %k4, -48
-; CHECK32-KNL-NEXT:    .cfi_offset %k5, -40
-; CHECK32-KNL-NEXT:    .cfi_offset %k6, -32
-; CHECK32-KNL-NEXT:    .cfi_offset %k7, -24
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm0, -576
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm1, -512
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm2, -448
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm3, -384
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm4, -320
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm5, -256
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm6, -192
+; CHECK32-KNL-NEXT:    .cfi_offset %xmm7, -128
+; CHECK32-KNL-NEXT:    .cfi_offset %k0, -32
+; CHECK32-KNL-NEXT:    .cfi_offset %k1, -30
+; CHECK32-KNL-NEXT:    .cfi_offset %k2, -28
+; CHECK32-KNL-NEXT:    .cfi_offset %k3, -26
+; CHECK32-KNL-NEXT:    .cfi_offset %k4, -24
+; CHECK32-KNL-NEXT:    .cfi_offset %k5, -22
+; CHECK32-KNL-NEXT:    .cfi_offset %k6, -20
+; CHECK32-KNL-NEXT:    .cfi_offset %k7, -18
 ; CHECK32-KNL-NEXT:    cld ## encoding: [0xfc]
 ; CHECK32-KNL-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A]
 ; CHECK32-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
@@ -584,25 +584,25 @@ define x86_intrcc void @foo(i8* %frame)
 ; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
 ; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
 ; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    ## imm = 0x270
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k2 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k3 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k4 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k5 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k6 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    kmovw {{[0-9]+}}(%esp), %k7 ## 2-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## imm = 0x230
 ; CHECK32-KNL-NEXT:    popl %eax ## encoding: [0x58]
 ; CHECK32-KNL-NEXT:    popl %ecx ## encoding: [0x59]
 ; CHECK32-KNL-NEXT:    popl %edx ## encoding: [0x5a]




More information about the llvm-commits mailing list