[llvm] b7e110f - [X86] Align stack to 16-bytes on 32-bit with X86_INTR call convention

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 1 02:00:46 PDT 2023


Author: Antonio Abbatangelo
Date: 2023-06-01T17:00:34+08:00
New Revision: b7e110fcfe22a1f887507dbaa6fdb001630e223d

URL: https://github.com/llvm/llvm-project/commit/b7e110fcfe22a1f887507dbaa6fdb001630e223d
DIFF: https://github.com/llvm/llvm-project/commit/b7e110fcfe22a1f887507dbaa6fdb001630e223d.diff

LOG: [X86] Align stack to 16-bytes on 32-bit with X86_INTR call convention

Adds a dynamic stack alignment to functions under the interrupt call
convention on x86-32. This fixes the issue where the stack can be
misaligned on entry, since x86-32 makes no guarantees about the stack
pointer position when the interrupt service routine is called.

The alignment is done by overriding X86RegisterInfo::shouldRealignStack,
and by setting the correct alignment in X86FrameLowering::calculateMaxStackAlign.
This forces the interrupt handler to be dynamically aligned, generating
the appropriate `and` instruction in the prologue and `lea` in the
epilogue. The `no-realign-stack` attribute can be used as an opt-out.

Fixes #26851

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D151400

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/lib/Target/X86/X86RegisterInfo.cpp
    llvm/lib/Target/X86/X86RegisterInfo.h
    llvm/test/CodeGen/X86/x86-32-intrcc.ll
    llvm/test/CodeGen/X86/x86-interrupt_cc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3870b430a46e2..a5a4f91299f3d 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1235,12 +1235,20 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
   Align StackAlign = getStackAlign();
-  if (MF.getFunction().hasFnAttribute("stackrealign")) {
+  bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
+  if (HasRealign) {
     if (MFI.hasCalls())
       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
     else if (MaxAlign < SlotSize)
       MaxAlign = Align(SlotSize);
   }
+
+  if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) {
+    if (HasRealign)
+      MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
+    else
+      MaxAlign = Align(16);
+  }
   return MaxAlign.value();
 }
 

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 0796ac65d7eec..bd29e9317ca5e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -743,6 +743,13 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   return true;
 }
 
+bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
+  if (TargetRegisterInfo::shouldRealignStack(MF))
+    return true;
+
+  return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
+}
+
 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
 // TODO: In this case we should be really trying first to entirely eliminate

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 48eeb72479f8c..da7b171e4cf6d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,8 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
+  bool shouldRealignStack(const MachineFunction &MF) const override;
+
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            unsigned FIOperandNum, Register BaseReg,
                            int FIOffset) const;

diff  --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 0f465761dd6ee..2e482753e2685 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -9,63 +9,86 @@
 
 
 ; Spills eax, putting original esp at +4.
-; No stack adjustment if declared with no error code
+; Stack is dyamically realigned to 16 bytes, and then reloaded to ebp - 4
+; With no error code, the stack is not incremented by 4 bytes before returning
 define x86_intrcc void @test_isr_no_ecode(ptr byval(%struct.interrupt_frame) %frame) nounwind {
 ; CHECK-LABEL: test_isr_no_ecode:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
-; CHECK-NEXT:    movl 12(%esp), %eax
+; CHECK-NEXT:    movl 12(%ebp), %eax
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    leal -4(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    iretl
 ;
 ; CHECK0-LABEL: test_isr_no_ecode:
 ; CHECK0:       # %bb.0:
+; CHECK0-NEXT:    pushl %ebp
+; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
-; CHECK0-NEXT:    leal 4(%esp), %eax
+; CHECK0-NEXT:    leal 4(%ebp), %eax
 ; CHECK0-NEXT:    movl 8(%eax), %eax
 ; CHECK0-NEXT:    #APP
 ; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    leal -4(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
+; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    iretl
+; CHECK-NEXT;    movl %esp, %ebp
   %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
   %flags = load i32, ptr %pflags, align 4
   call void asm sideeffect "", "r"(i32 %flags)
   ret void
 }
 
-; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
-; before return, popping the error code.
+; Spills eax and ecx, putting original esp at +8.
+; Stack is dynamically realigned to 16 bytes, and then reloaded to ebp - 8
+; Error code is popped from the stack with an increment of 4 before returning
 define x86_intrcc void @test_isr_ecode(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) nounwind {
 ; CHECK-LABEL: test_isr_ecode:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
-; CHECK-NEXT:    movl 8(%esp), %eax
-; CHECK-NEXT:    movl 20(%esp), %ecx
+; CHECK-NEXT:    movl 4(%ebp), %eax
+; CHECK-NEXT:    movl 16(%ebp), %ecx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    leal -8(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ecx
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    iretl
 ;
 ; CHECK0-LABEL: test_isr_ecode:
 ; CHECK0:       # %bb.0:
+; CHECK0-NEXT:    pushl %ebp
+; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
-; CHECK0-NEXT:    movl 8(%esp), %ecx
-; CHECK0-NEXT:    leal 12(%esp), %eax
+; CHECK0-NEXT:    movl 4(%ebp), %ecx
+; CHECK0-NEXT:    leal 8(%ebp), %eax
 ; CHECK0-NEXT:    movl 8(%eax), %eax
 ; CHECK0-NEXT:    #APP
 ; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    leal -8(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ecx
+; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    addl $4, %esp
 ; CHECK0-NEXT:    iretl
   %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
@@ -79,13 +102,18 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK-LABEL: test_isr_clobbers:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    leal -12(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    popl %ecx
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    iretl
@@ -93,17 +121,22 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK0-LABEL: test_isr_clobbers:
 ; CHECK0:       # %bb.0:
 ; CHECK0-NEXT:    pushl %ebp
+; CHECK0-NEXT:    movl %esp, %ebp
+; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %ebx
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    #APP
 ; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    leal -12(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ebx
+; CHECK0-NEXT:    popl %ecx
 ; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    addl $4, %esp
 ; CHECK0-NEXT:    iretl
-  call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+  call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{ebp}"()
   ret void
 }
 
@@ -113,20 +146,30 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame) nounwind {
 ; CHECK-LABEL: test_isr_x87:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
 ; CHECK-NEXT:    fldt f80
 ; CHECK-NEXT:    fld1
 ; CHECK-NEXT:    faddp %st, %st(1)
 ; CHECK-NEXT:    fstpt f80
+; CHECK-NEXT:    movl %ebp, %esp
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    iretl
 ;
 ; CHECK0-LABEL: test_isr_x87:
 ; CHECK0:       # %bb.0: # %entry
+; CHECK0-NEXT:    pushl %ebp
+; CHECK0-NEXT:    movl %esp, %ebp
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    fldt f80
 ; CHECK0-NEXT:    fld1
 ; CHECK0-NEXT:    faddp %st, %st(1)
 ; CHECK0-NEXT:    fstpt f80
+; CHECK0-NEXT:    movl %ebp, %esp
+; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    iretl
 entry:
   %ld = load x86_fp80, ptr @f80, align 4
@@ -135,8 +178,8 @@ entry:
   ret void
 }
 
-; Use a frame pointer to check the offsets. No return address, arguments start
-; at EBP+4.
+; Use the interrupt_frame pointer to check the offsets.
+; No return address, arguments start at EBP+4.
 define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %p) #0 {
 ; CHECK-LABEL: test_fp_1:
 ; CHECK:       # %bb.0: # %entry
@@ -144,11 +187,13 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
 ; CHECK-NEXT:    leal 20(%ebp), %eax
 ; CHECK-NEXT:    leal 4(%ebp), %ecx
 ; CHECK-NEXT:    movl %ecx, sink_address
 ; CHECK-NEXT:    movl %eax, sink_address
+; CHECK-NEXT:    leal -8(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ecx
 ; CHECK-NEXT:    popl %ebp
@@ -160,12 +205,14 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
 ; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    leal 4(%ebp), %ecx
 ; CHECK0-NEXT:    movl %ecx, %eax
 ; CHECK0-NEXT:    addl $16, %eax
 ; CHECK0-NEXT:    movl %ecx, sink_address
 ; CHECK0-NEXT:    movl %eax, sink_address
+; CHECK0-NEXT:    leal -8(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ecx
 ; CHECK0-NEXT:    popl %ebp
@@ -186,6 +233,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK-NEXT:    pushl %edx
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
 ; CHECK-NEXT:    movl 4(%ebp), %eax
 ; CHECK-NEXT:    leal 24(%ebp), %ecx
@@ -193,6 +241,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK-NEXT:    movl %edx, sink_address
 ; CHECK-NEXT:    movl %ecx, sink_address
 ; CHECK-NEXT:    movl %eax, sink_i32
+; CHECK-NEXT:    leal -12(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ecx
 ; CHECK-NEXT:    popl %edx
@@ -207,6 +256,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK0-NEXT:    pushl %edx
 ; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    movl 4(%ebp), %eax
 ; CHECK0-NEXT:    leal 8(%ebp), %edx
@@ -215,6 +265,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK0-NEXT:    movl %edx, sink_address
 ; CHECK0-NEXT:    movl %ecx, sink_address
 ; CHECK0-NEXT:    movl %eax, sink_i32
+; CHECK0-NEXT:    leal -12(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ecx
 ; CHECK0-NEXT:    popl %edx
@@ -236,9 +287,11 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
 ; CHECK-NEXT:    leal 4(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, sink_address
+; CHECK-NEXT:    leal -4(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    addl $4, %esp
@@ -249,10 +302,12 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
 ; CHECK0-NEXT:    pushl %ebp
 ; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    movl 4(%ebp), %eax
 ; CHECK0-NEXT:    leal 4(%ebp), %eax
 ; CHECK0-NEXT:    movl %eax, sink_address
+; CHECK0-NEXT:    leal -4(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    addl $4, %esp
@@ -264,4 +319,75 @@ entry:
   ret void
 }
 
+; Disabling dynamic realignment with attributes should work
+define x86_intrcc void @test_isr_no_realign(ptr byval(%struct.interrupt_frame) %frame) #1 {
+; CHECK-LABEL: test_isr_no_realign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    cld
+; CHECK-NEXT:    movl 12(%esp), %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    iretl
+;
+; CHECK0-LABEL: test_isr_no_realign:
+; CHECK0:       # %bb.0:
+; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    cld
+; CHECK0-NEXT:    leal 4(%esp), %eax
+; CHECK0-NEXT:    movl 8(%eax), %eax
+; CHECK0-NEXT:    #APP
+; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    popl %eax
+; CHECK0-NEXT:    iretl
+  %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
+  %flags = load i32, ptr %pflags, align 4
+  call void asm sideeffect "", "r"(i32 %flags)
+  ret void
+}
+
+; The stackrealign attribute should work, and the function's alignment
+; should be respected over the default 16-byte alignment required by the calling
+; convention.
+define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) #2 {
+; CHECK-LABEL: test_isr_realign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    andl $-32, %esp
+; CHECK-NEXT:    subl $32, %esp
+; CHECK-NEXT:    cld
+; CHECK-NEXT:    movl 4(%ebp), %eax
+; CHECK-NEXT:    movl %eax, (%esp)
+; CHECK-NEXT:    leal -4(%ebp), %esp
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    addl $4, %esp
+; CHECK-NEXT:    iretl
+;
+; CHECK0-LABEL: test_isr_realign:
+; CHECK0:       # %bb.0:
+; CHECK0-NEXT:    pushl %ebp
+; CHECK0-NEXT:    movl %esp, %ebp
+; CHECK0-NEXT:    pushl %eax
+; CHECK0-NEXT:    andl $-32, %esp
+; CHECK0-NEXT:    subl $32, %esp
+; CHECK0-NEXT:    cld
+; CHECK0-NEXT:    movl 4(%ebp), %eax
+; CHECK0-NEXT:    movl %eax, (%esp)
+; CHECK0-NEXT:    leal -4(%ebp), %esp
+; CHECK0-NEXT:    popl %eax
+; CHECK0-NEXT:    popl %ebp
+; CHECK0-NEXT:    addl $4, %esp
+; CHECK0-NEXT:    iretl
+  %ecode.stack = alloca i32, align 32
+  store i32 %ecode, ptr %ecode.stack
+  ret void
+}
+
+
 attributes #0 = { nounwind "frame-pointer"="all" }
+attributes #1 = { nounwind "no-realign-stack" }
+attributes #2 = { nounwind "stackrealign" }

diff  --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
index 56545f49ee543..cf8b7096816af 100644
--- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
+++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
@@ -506,50 +506,52 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
 ;
 ; CHECK32-KNL-LABEL: foo:
 ; CHECK32-KNL:       ## %bb.0:
-; CHECK32-KNL-NEXT:    pushl %edx ## encoding: [0x52]
+; CHECK32-KNL-NEXT:    pushl %ebp ## encoding: [0x55]
 ; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-KNL-NEXT:    .cfi_offset %ebp, -8
+; CHECK32-KNL-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
+; CHECK32-KNL-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK32-KNL-NEXT:    pushl %edx ## encoding: [0x52]
 ; CHECK32-KNL-NEXT:    pushl %ecx ## encoding: [0x51]
-; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK32-KNL-NEXT:    pushl %eax ## encoding: [0x50]
-; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-KNL-NEXT:    andl $-16, %esp ## encoding: [0x83,0xe4,0xf0]
 ; CHECK32-KNL-NEXT:    subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00]
 ; CHECK32-KNL-NEXT:    ## imm = 0x230
 ; CHECK32-KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x7d,0xf2]
 ; CHECK32-KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x75,0xf0]
 ; CHECK32-KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x6d,0xee]
 ; CHECK32-KNL-NEXT:    kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x65,0xec]
 ; CHECK32-KNL-NEXT:    kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x5d,0xea]
 ; CHECK32-KNL-NEXT:    kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x55,0xe8]
 ; CHECK32-KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x4d,0xe6]
 ; CHECK32-KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x45,0xe4]
 ; CHECK32-KNL-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x88,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x48,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0x08,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0xc8,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x88,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x48,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
-; CHECK32-KNL-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 576
-; CHECK32-KNL-NEXT:    .cfi_offset %eax, -16
-; CHECK32-KNL-NEXT:    .cfi_offset %ecx, -12
-; CHECK32-KNL-NEXT:    .cfi_offset %edx, -8
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0x08,0xfe,0xff,0xff]
+; CHECK32-KNL-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0xc8,0xfd,0xff,0xff]
+; CHECK32-KNL-NEXT:    .cfi_offset %eax, -20
+; CHECK32-KNL-NEXT:    .cfi_offset %ecx, -16
+; CHECK32-KNL-NEXT:    .cfi_offset %edx, -12
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm0, -576
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm1, -512
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm2, -448
@@ -558,102 +560,104 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm5, -256
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm6, -192
 ; CHECK32-KNL-NEXT:    .cfi_offset %xmm7, -128
-; CHECK32-KNL-NEXT:    .cfi_offset %k0, -32
-; CHECK32-KNL-NEXT:    .cfi_offset %k1, -30
-; CHECK32-KNL-NEXT:    .cfi_offset %k2, -28
-; CHECK32-KNL-NEXT:    .cfi_offset %k3, -26
-; CHECK32-KNL-NEXT:    .cfi_offset %k4, -24
-; CHECK32-KNL-NEXT:    .cfi_offset %k5, -22
-; CHECK32-KNL-NEXT:    .cfi_offset %k6, -20
-; CHECK32-KNL-NEXT:    .cfi_offset %k7, -18
+; CHECK32-KNL-NEXT:    .cfi_offset %k0, -36
+; CHECK32-KNL-NEXT:    .cfi_offset %k1, -34
+; CHECK32-KNL-NEXT:    .cfi_offset %k2, -32
+; CHECK32-KNL-NEXT:    .cfi_offset %k3, -30
+; CHECK32-KNL-NEXT:    .cfi_offset %k4, -28
+; CHECK32-KNL-NEXT:    .cfi_offset %k5, -26
+; CHECK32-KNL-NEXT:    .cfi_offset %k6, -24
+; CHECK32-KNL-NEXT:    .cfi_offset %k7, -22
 ; CHECK32-KNL-NEXT:    cld ## encoding: [0xfc]
 ; CHECK32-KNL-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A]
 ; CHECK32-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
-; CHECK32-KNL-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0xc8,0xfd,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0x08,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x48,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x88,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0xc8,0xfe,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0x08,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x48,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x88,0xff,0xff,0xff]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x45,0xe4]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x4d,0xe6]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x55,0xe8]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x5d,0xea]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x65,0xec]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x6d,0xee]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x75,0xf0]
 ; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
-; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT:    ## imm = 0x230
+; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x7d,0xf2]
+; CHECK32-KNL-NEXT:    leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4]
 ; CHECK32-KNL-NEXT:    popl %eax ## encoding: [0x58]
 ; CHECK32-KNL-NEXT:    popl %ecx ## encoding: [0x59]
 ; CHECK32-KNL-NEXT:    popl %edx ## encoding: [0x5a]
+; CHECK32-KNL-NEXT:    popl %ebp ## encoding: [0x5d]
 ; CHECK32-KNL-NEXT:    iretl ## encoding: [0xcf]
 ;
 ; CHECK32-SKX-LABEL: foo:
 ; CHECK32-SKX:       ## %bb.0:
-; CHECK32-SKX-NEXT:    pushl %edx ## encoding: [0x52]
+; CHECK32-SKX-NEXT:    pushl %ebp ## encoding: [0x55]
 ; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-SKX-NEXT:    .cfi_offset %ebp, -8
+; CHECK32-SKX-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
+; CHECK32-SKX-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK32-SKX-NEXT:    pushl %edx ## encoding: [0x52]
 ; CHECK32-SKX-NEXT:    pushl %ecx ## encoding: [0x51]
-; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK32-SKX-NEXT:    pushl %eax ## encoding: [0x50]
-; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-SKX-NEXT:    andl $-16, %esp ## encoding: [0x83,0xe4,0xf0]
 ; CHECK32-SKX-NEXT:    subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
 ; CHECK32-SKX-NEXT:    ## imm = 0x270
 ; CHECK32-SKX-NEXT:    kmovq %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x7d,0xe8]
 ; CHECK32-SKX-NEXT:    kmovq %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x75,0xe0]
 ; CHECK32-SKX-NEXT:    kmovq %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x6d,0xd8]
 ; CHECK32-SKX-NEXT:    kmovq %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x65,0xd0]
 ; CHECK32-SKX-NEXT:    kmovq %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x5d,0xc8]
 ; CHECK32-SKX-NEXT:    kmovq %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x55,0xc0]
 ; CHECK32-SKX-NEXT:    kmovq %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x4d,0xb8]
 ; CHECK32-SKX-NEXT:    kmovq %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x45,0xb0]
 ; CHECK32-SKX-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x48,0xff,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x08,0xff,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0xc8,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0x88,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x48,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x08,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
-; CHECK32-SKX-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 640
-; CHECK32-SKX-NEXT:    .cfi_offset %eax, -16
-; CHECK32-SKX-NEXT:    .cfi_offset %ecx, -12
-; CHECK32-SKX-NEXT:    .cfi_offset %edx, -8
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0xc8,0xfd,0xff,0xff]
+; CHECK32-SKX-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0x88,0xfd,0xff,0xff]
+; CHECK32-SKX-NEXT:    .cfi_offset %eax, -20
+; CHECK32-SKX-NEXT:    .cfi_offset %ecx, -16
+; CHECK32-SKX-NEXT:    .cfi_offset %edx, -12
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm0, -640
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm1, -576
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm2, -512
@@ -662,55 +666,55 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm5, -320
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm6, -256
 ; CHECK32-SKX-NEXT:    .cfi_offset %xmm7, -192
-; CHECK32-SKX-NEXT:    .cfi_offset %k0, -80
-; CHECK32-SKX-NEXT:    .cfi_offset %k1, -72
-; CHECK32-SKX-NEXT:    .cfi_offset %k2, -64
-; CHECK32-SKX-NEXT:    .cfi_offset %k3, -56
-; CHECK32-SKX-NEXT:    .cfi_offset %k4, -48
-; CHECK32-SKX-NEXT:    .cfi_offset %k5, -40
-; CHECK32-SKX-NEXT:    .cfi_offset %k6, -32
-; CHECK32-SKX-NEXT:    .cfi_offset %k7, -24
+; CHECK32-SKX-NEXT:    .cfi_offset %k0, -88
+; CHECK32-SKX-NEXT:    .cfi_offset %k1, -80
+; CHECK32-SKX-NEXT:    .cfi_offset %k2, -72
+; CHECK32-SKX-NEXT:    .cfi_offset %k3, -64
+; CHECK32-SKX-NEXT:    .cfi_offset %k4, -56
+; CHECK32-SKX-NEXT:    .cfi_offset %k5, -48
+; CHECK32-SKX-NEXT:    .cfi_offset %k6, -40
+; CHECK32-SKX-NEXT:    .cfi_offset %k7, -32
 ; CHECK32-SKX-NEXT:    cld ## encoding: [0xfc]
 ; CHECK32-SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; CHECK32-SKX-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A]
 ; CHECK32-SKX-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
-; CHECK32-SKX-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0x88,0xfd,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0xc8,0xfd,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x08,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x48,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0x88,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0xc8,0xfe,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x08,0xff,0xff,0xff]
 ; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x48,0xff,0xff,0xff]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x45,0xb0]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x4d,0xb8]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x55,0xc0]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x5d,0xc8]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x65,0xd0]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x6d,0xd8]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x75,0xe0]
 ; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 8-byte Reload
-; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
-; CHECK32-SKX-NEXT:    addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
-; CHECK32-SKX-NEXT:    ## imm = 0x270
+; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x7d,0xe8]
+; CHECK32-SKX-NEXT:    leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4]
 ; CHECK32-SKX-NEXT:    popl %eax ## encoding: [0x58]
 ; CHECK32-SKX-NEXT:    popl %ecx ## encoding: [0x59]
 ; CHECK32-SKX-NEXT:    popl %edx ## encoding: [0x5a]
+; CHECK32-SKX-NEXT:    popl %ebp ## encoding: [0x5d]
 ; CHECK32-SKX-NEXT:    iretl ## encoding: [0xcf]
   call void @bar()
   ret void


        


More information about the llvm-commits mailing list