[llvm] b7e110f - [X86] Align stack to 16-bytes on 32-bit with X86_INTR call convention
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 1 02:00:46 PDT 2023
Author: Antonio Abbatangelo
Date: 2023-06-01T17:00:34+08:00
New Revision: b7e110fcfe22a1f887507dbaa6fdb001630e223d
URL: https://github.com/llvm/llvm-project/commit/b7e110fcfe22a1f887507dbaa6fdb001630e223d
DIFF: https://github.com/llvm/llvm-project/commit/b7e110fcfe22a1f887507dbaa6fdb001630e223d.diff
LOG: [X86] Align stack to 16-bytes on 32-bit with X86_INTR call convention
Adds a dynamic stack alignment to functions under the interrupt call
convention on x86-32. This fixes the issue where the stack can be
misaligned on entry, since x86-32 makes no guarantees about the stack
pointer position when the interrupt service routine is called.
The alignment is done by overriding X86RegisterInfo::shouldRealignStack,
and by setting the correct alignment in X86FrameLowering::calculateMaxStackAlign.
This forces the interrupt handler to be dynamically aligned, generating
the appropriate `and` instruction in the prologue and `lea` in the
epilogue. The `no-realign-stack` attribute can be used as an opt-out.
Fixes #26851
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D151400
Added:
Modified:
llvm/lib/Target/X86/X86FrameLowering.cpp
llvm/lib/Target/X86/X86RegisterInfo.cpp
llvm/lib/Target/X86/X86RegisterInfo.h
llvm/test/CodeGen/X86/x86-32-intrcc.ll
llvm/test/CodeGen/X86/x86-interrupt_cc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3870b430a46e2..a5a4f91299f3d 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1235,12 +1235,20 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
const MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
Align StackAlign = getStackAlign();
- if (MF.getFunction().hasFnAttribute("stackrealign")) {
+ bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
+ if (HasRealign) {
if (MFI.hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
else if (MaxAlign < SlotSize)
MaxAlign = Align(SlotSize);
}
+
+ if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) {
+ if (HasRealign)
+ MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
+ else
+ MaxAlign = Align(16);
+ }
return MaxAlign.value();
}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 0796ac65d7eec..bd29e9317ca5e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -743,6 +743,13 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
return true;
}
+bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
+ if (TargetRegisterInfo::shouldRealignStack(MF))
+ return true;
+
+ return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
+}
+
// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
// TODO: In this case we should be really trying first to entirely eliminate
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 48eeb72479f8c..da7b171e4cf6d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,8 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
bool canRealignStack(const MachineFunction &MF) const override;
+ bool shouldRealignStack(const MachineFunction &MF) const override;
+
void eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned FIOperandNum, Register BaseReg,
int FIOffset) const;
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 0f465761dd6ee..2e482753e2685 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -9,63 +9,86 @@
; Spills eax, putting original esp at +4.
-; No stack adjustment if declared with no error code
+; Stack is dyamically realigned to 16 bytes, and then reloaded to ebp - 4
+; With no error code, the stack is not incremented by 4 bytes before returning
define x86_intrcc void @test_isr_no_ecode(ptr byval(%struct.interrupt_frame) %frame) nounwind {
; CHECK-LABEL: test_isr_no_ecode:
; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
-; CHECK-NEXT: movl 12(%esp), %eax
+; CHECK-NEXT: movl 12(%ebp), %eax
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: leal -4(%ebp), %esp
; CHECK-NEXT: popl %eax
+; CHECK-NEXT: popl %ebp
; CHECK-NEXT: iretl
;
; CHECK0-LABEL: test_isr_no_ecode:
; CHECK0: # %bb.0:
+; CHECK0-NEXT: pushl %ebp
+; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
-; CHECK0-NEXT: leal 4(%esp), %eax
+; CHECK0-NEXT: leal 4(%ebp), %eax
; CHECK0-NEXT: movl 8(%eax), %eax
; CHECK0-NEXT: #APP
; CHECK0-NEXT: #NO_APP
+; CHECK0-NEXT: leal -4(%ebp), %esp
; CHECK0-NEXT: popl %eax
+; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: iretl
+; CHECK-NEXT; movl %esp, %ebp
%pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
%flags = load i32, ptr %pflags, align 4
call void asm sideeffect "", "r"(i32 %flags)
ret void
}
-; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
-; before return, popping the error code.
+; Spills eax and ecx, putting original esp at +8.
+; Stack is dynamically realigned to 16 bytes, and then reloaded to ebp - 8
+; Error code is popped from the stack with an increment of 4 before returning
define x86_intrcc void @test_isr_ecode(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) nounwind {
; CHECK-LABEL: test_isr_ecode:
; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
-; CHECK-NEXT: movl 8(%esp), %eax
-; CHECK-NEXT: movl 20(%esp), %ecx
+; CHECK-NEXT: movl 4(%ebp), %eax
+; CHECK-NEXT: movl 16(%ebp), %ecx
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: leal -8(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %ebp
; CHECK-NEXT: addl $4, %esp
; CHECK-NEXT: iretl
;
; CHECK0-LABEL: test_isr_ecode:
; CHECK0: # %bb.0:
+; CHECK0-NEXT: pushl %ebp
+; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
-; CHECK0-NEXT: movl 8(%esp), %ecx
-; CHECK0-NEXT: leal 12(%esp), %eax
+; CHECK0-NEXT: movl 4(%ebp), %ecx
+; CHECK0-NEXT: leal 8(%ebp), %eax
; CHECK0-NEXT: movl 8(%eax), %eax
; CHECK0-NEXT: #APP
; CHECK0-NEXT: #NO_APP
+; CHECK0-NEXT: leal -8(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ecx
+; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: addl $4, %esp
; CHECK0-NEXT: iretl
%pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
@@ -79,13 +102,18 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
; CHECK-LABEL: test_isr_clobbers:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: leal -12(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: popl %ecx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: addl $4, %esp
; CHECK-NEXT: iretl
@@ -93,17 +121,22 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
; CHECK0-LABEL: test_isr_clobbers:
; CHECK0: # %bb.0:
; CHECK0-NEXT: pushl %ebp
+; CHECK0-NEXT: movl %esp, %ebp
+; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %ebx
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: #APP
; CHECK0-NEXT: #NO_APP
+; CHECK0-NEXT: leal -12(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ebx
+; CHECK0-NEXT: popl %ecx
; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: addl $4, %esp
; CHECK0-NEXT: iretl
- call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+ call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{ebp}"()
ret void
}
@@ -113,20 +146,30 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame) nounwind {
; CHECK-LABEL: test_isr_x87:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: fldt f80
; CHECK-NEXT: fld1
; CHECK-NEXT: faddp %st, %st(1)
; CHECK-NEXT: fstpt f80
+; CHECK-NEXT: movl %ebp, %esp
+; CHECK-NEXT: popl %ebp
; CHECK-NEXT: iretl
;
; CHECK0-LABEL: test_isr_x87:
; CHECK0: # %bb.0: # %entry
+; CHECK0-NEXT: pushl %ebp
+; CHECK0-NEXT: movl %esp, %ebp
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: fldt f80
; CHECK0-NEXT: fld1
; CHECK0-NEXT: faddp %st, %st(1)
; CHECK0-NEXT: fstpt f80
+; CHECK0-NEXT: movl %ebp, %esp
+; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: iretl
entry:
%ld = load x86_fp80, ptr @f80, align 4
@@ -135,8 +178,8 @@ entry:
ret void
}
-; Use a frame pointer to check the offsets. No return address, arguments start
-; at EBP+4.
+; Use the interrupt_frame pointer to check the offsets.
+; No return address, arguments start at EBP+4.
define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %p) #0 {
; CHECK-LABEL: test_fp_1:
; CHECK: # %bb.0: # %entry
@@ -144,11 +187,13 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 20(%ebp), %eax
; CHECK-NEXT: leal 4(%ebp), %ecx
; CHECK-NEXT: movl %ecx, sink_address
; CHECK-NEXT: movl %eax, sink_address
+; CHECK-NEXT: leal -8(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ecx
; CHECK-NEXT: popl %ebp
@@ -160,12 +205,14 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: leal 4(%ebp), %ecx
; CHECK0-NEXT: movl %ecx, %eax
; CHECK0-NEXT: addl $16, %eax
; CHECK0-NEXT: movl %ecx, sink_address
; CHECK0-NEXT: movl %eax, sink_address
+; CHECK0-NEXT: leal -8(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ecx
; CHECK0-NEXT: popl %ebp
@@ -186,6 +233,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %edx
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: leal 24(%ebp), %ecx
@@ -193,6 +241,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: movl %edx, sink_address
; CHECK-NEXT: movl %ecx, sink_address
; CHECK-NEXT: movl %eax, sink_i32
+; CHECK-NEXT: leal -12(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ecx
; CHECK-NEXT: popl %edx
@@ -207,6 +256,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %edx
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 8(%ebp), %edx
@@ -215,6 +265,7 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: movl %edx, sink_address
; CHECK0-NEXT: movl %ecx, sink_address
; CHECK0-NEXT: movl %eax, sink_i32
+; CHECK0-NEXT: leal -12(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ecx
; CHECK0-NEXT: popl %edx
@@ -236,9 +287,11 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 4(%ebp), %eax
; CHECK-NEXT: movl %eax, sink_address
+; CHECK-NEXT: leal -4(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: addl $4, %esp
@@ -249,10 +302,12 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK0-NEXT: pushl %ebp
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, sink_address
+; CHECK0-NEXT: leal -4(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: addl $4, %esp
@@ -264,4 +319,75 @@ entry:
ret void
}
+; Disabling dynamic realignment with attributes should work
+define x86_intrcc void @test_isr_no_realign(ptr byval(%struct.interrupt_frame) %frame) #1 {
+; CHECK-LABEL: test_isr_no_realign:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: cld
+; CHECK-NEXT: movl 12(%esp), %eax
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: iretl
+;
+; CHECK0-LABEL: test_isr_no_realign:
+; CHECK0: # %bb.0:
+; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: cld
+; CHECK0-NEXT: leal 4(%esp), %eax
+; CHECK0-NEXT: movl 8(%eax), %eax
+; CHECK0-NEXT: #APP
+; CHECK0-NEXT: #NO_APP
+; CHECK0-NEXT: popl %eax
+; CHECK0-NEXT: iretl
+ %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2
+ %flags = load i32, ptr %pflags, align 4
+ call void asm sideeffect "", "r"(i32 %flags)
+ ret void
+}
+
+; The stackrealign attribute should work, and the function's alignment
+; should be respected over the default 16-byte alignment required by the calling
+; convention.
+define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) #2 {
+; CHECK-LABEL: test_isr_realign:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: andl $-32, %esp
+; CHECK-NEXT: subl $32, %esp
+; CHECK-NEXT: cld
+; CHECK-NEXT: movl 4(%ebp), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: leal -4(%ebp), %esp
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: addl $4, %esp
+; CHECK-NEXT: iretl
+;
+; CHECK0-LABEL: test_isr_realign:
+; CHECK0: # %bb.0:
+; CHECK0-NEXT: pushl %ebp
+; CHECK0-NEXT: movl %esp, %ebp
+; CHECK0-NEXT: pushl %eax
+; CHECK0-NEXT: andl $-32, %esp
+; CHECK0-NEXT: subl $32, %esp
+; CHECK0-NEXT: cld
+; CHECK0-NEXT: movl 4(%ebp), %eax
+; CHECK0-NEXT: movl %eax, (%esp)
+; CHECK0-NEXT: leal -4(%ebp), %esp
+; CHECK0-NEXT: popl %eax
+; CHECK0-NEXT: popl %ebp
+; CHECK0-NEXT: addl $4, %esp
+; CHECK0-NEXT: iretl
+ %ecode.stack = alloca i32, align 32
+ store i32 %ecode, ptr %ecode.stack
+ ret void
+}
+
+
attributes #0 = { nounwind "frame-pointer"="all" }
+attributes #1 = { nounwind "no-realign-stack" }
+attributes #2 = { nounwind "stackrealign" }
diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
index 56545f49ee543..cf8b7096816af 100644
--- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
+++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
@@ -506,50 +506,52 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
;
; CHECK32-KNL-LABEL: foo:
; CHECK32-KNL: ## %bb.0:
-; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52]
+; CHECK32-KNL-NEXT: pushl %ebp ## encoding: [0x55]
; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-KNL-NEXT: .cfi_offset %ebp, -8
+; CHECK32-KNL-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5]
+; CHECK32-KNL-NEXT: .cfi_def_cfa_register %ebp
+; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52]
; CHECK32-KNL-NEXT: pushl %ecx ## encoding: [0x51]
-; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 12
; CHECK32-KNL-NEXT: pushl %eax ## encoding: [0x50]
-; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-KNL-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0]
; CHECK32-KNL-NEXT: subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00]
; CHECK32-KNL-NEXT: ## imm = 0x230
; CHECK32-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x7d,0xf2]
; CHECK32-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x75,0xf0]
; CHECK32-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x6d,0xee]
; CHECK32-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x65,0xec]
; CHECK32-KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x5d,0xea]
; CHECK32-KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x55,0xe8]
; CHECK32-KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x4d,0xe6]
; CHECK32-KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x45,0xe4]
; CHECK32-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x88,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x48,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0x08,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0xc8,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x88,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x48,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
-; CHECK32-KNL-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 576
-; CHECK32-KNL-NEXT: .cfi_offset %eax, -16
-; CHECK32-KNL-NEXT: .cfi_offset %ecx, -12
-; CHECK32-KNL-NEXT: .cfi_offset %edx, -8
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0x08,0xfe,0xff,0xff]
+; CHECK32-KNL-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0xc8,0xfd,0xff,0xff]
+; CHECK32-KNL-NEXT: .cfi_offset %eax, -20
+; CHECK32-KNL-NEXT: .cfi_offset %ecx, -16
+; CHECK32-KNL-NEXT: .cfi_offset %edx, -12
; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -576
; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -512
; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -448
@@ -558,102 +560,104 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -256
; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -192
; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -128
-; CHECK32-KNL-NEXT: .cfi_offset %k0, -32
-; CHECK32-KNL-NEXT: .cfi_offset %k1, -30
-; CHECK32-KNL-NEXT: .cfi_offset %k2, -28
-; CHECK32-KNL-NEXT: .cfi_offset %k3, -26
-; CHECK32-KNL-NEXT: .cfi_offset %k4, -24
-; CHECK32-KNL-NEXT: .cfi_offset %k5, -22
-; CHECK32-KNL-NEXT: .cfi_offset %k6, -20
-; CHECK32-KNL-NEXT: .cfi_offset %k7, -18
+; CHECK32-KNL-NEXT: .cfi_offset %k0, -36
+; CHECK32-KNL-NEXT: .cfi_offset %k1, -34
+; CHECK32-KNL-NEXT: .cfi_offset %k2, -32
+; CHECK32-KNL-NEXT: .cfi_offset %k3, -30
+; CHECK32-KNL-NEXT: .cfi_offset %k4, -28
+; CHECK32-KNL-NEXT: .cfi_offset %k5, -26
+; CHECK32-KNL-NEXT: .cfi_offset %k6, -24
+; CHECK32-KNL-NEXT: .cfi_offset %k7, -22
; CHECK32-KNL-NEXT: cld ## encoding: [0xfc]
; CHECK32-KNL-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A]
; CHECK32-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
-; CHECK32-KNL-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0xc8,0xfd,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0x08,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x48,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x88,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0xc8,0xfe,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0x08,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x48,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x88,0xff,0xff,0xff]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x45,0xe4]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x4d,0xe6]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x55,0xe8]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x5d,0xea]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x65,0xec]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x6d,0xee]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00]
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x75,0xf0]
; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
-; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT: addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00]
-; CHECK32-KNL-NEXT: ## imm = 0x230
+; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x7d,0xf2]
+; CHECK32-KNL-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4]
; CHECK32-KNL-NEXT: popl %eax ## encoding: [0x58]
; CHECK32-KNL-NEXT: popl %ecx ## encoding: [0x59]
; CHECK32-KNL-NEXT: popl %edx ## encoding: [0x5a]
+; CHECK32-KNL-NEXT: popl %ebp ## encoding: [0x5d]
; CHECK32-KNL-NEXT: iretl ## encoding: [0xcf]
;
; CHECK32-SKX-LABEL: foo:
; CHECK32-SKX: ## %bb.0:
-; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52]
+; CHECK32-SKX-NEXT: pushl %ebp ## encoding: [0x55]
; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-SKX-NEXT: .cfi_offset %ebp, -8
+; CHECK32-SKX-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5]
+; CHECK32-SKX-NEXT: .cfi_def_cfa_register %ebp
+; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52]
; CHECK32-SKX-NEXT: pushl %ecx ## encoding: [0x51]
-; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 12
; CHECK32-SKX-NEXT: pushl %eax ## encoding: [0x50]
-; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-SKX-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0]
; CHECK32-SKX-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
; CHECK32-SKX-NEXT: ## imm = 0x270
; CHECK32-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x7d,0xe8]
; CHECK32-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x75,0xe0]
; CHECK32-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x6d,0xd8]
; CHECK32-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x65,0xd0]
; CHECK32-SKX-NEXT: kmovq %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x5d,0xc8]
; CHECK32-SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x55,0xc0]
; CHECK32-SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x4d,0xb8]
; CHECK32-SKX-NEXT: kmovq %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x45,0xb0]
; CHECK32-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x48,0xff,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x08,0xff,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0xc8,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0x88,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x48,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x08,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
-; CHECK32-SKX-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
-; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 640
-; CHECK32-SKX-NEXT: .cfi_offset %eax, -16
-; CHECK32-SKX-NEXT: .cfi_offset %ecx, -12
-; CHECK32-SKX-NEXT: .cfi_offset %edx, -8
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0xc8,0xfd,0xff,0xff]
+; CHECK32-SKX-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0x88,0xfd,0xff,0xff]
+; CHECK32-SKX-NEXT: .cfi_offset %eax, -20
+; CHECK32-SKX-NEXT: .cfi_offset %ecx, -16
+; CHECK32-SKX-NEXT: .cfi_offset %edx, -12
; CHECK32-SKX-NEXT: .cfi_offset %xmm0, -640
; CHECK32-SKX-NEXT: .cfi_offset %xmm1, -576
; CHECK32-SKX-NEXT: .cfi_offset %xmm2, -512
@@ -662,55 +666,55 @@ define x86_intrcc void @foo(ptr byval(i8) %frame) {
; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192
-; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
-; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
-; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
-; CHECK32-SKX-NEXT: .cfi_offset %k3, -56
-; CHECK32-SKX-NEXT: .cfi_offset %k4, -48
-; CHECK32-SKX-NEXT: .cfi_offset %k5, -40
-; CHECK32-SKX-NEXT: .cfi_offset %k6, -32
-; CHECK32-SKX-NEXT: .cfi_offset %k7, -24
+; CHECK32-SKX-NEXT: .cfi_offset %k0, -88
+; CHECK32-SKX-NEXT: .cfi_offset %k1, -80
+; CHECK32-SKX-NEXT: .cfi_offset %k2, -72
+; CHECK32-SKX-NEXT: .cfi_offset %k3, -64
+; CHECK32-SKX-NEXT: .cfi_offset %k4, -56
+; CHECK32-SKX-NEXT: .cfi_offset %k5, -48
+; CHECK32-SKX-NEXT: .cfi_offset %k6, -40
+; CHECK32-SKX-NEXT: .cfi_offset %k7, -32
; CHECK32-SKX-NEXT: cld ## encoding: [0xfc]
; CHECK32-SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK32-SKX-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A]
; CHECK32-SKX-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
-; CHECK32-SKX-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
+; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0x88,0xfd,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0xc8,0xfd,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x08,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x48,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0x88,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0xc8,0xfe,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x08,0xff,0xff,0xff]
; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x48,0xff,0xff,0xff]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x45,0xb0]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x4d,0xb8]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x55,0xc0]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x5d,0xc8]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x65,0xd0]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x6d,0xd8]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x75,0xe0]
; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 8-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
-; CHECK32-SKX-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
-; CHECK32-SKX-NEXT: ## imm = 0x270
+; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x7d,0xe8]
+; CHECK32-SKX-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4]
; CHECK32-SKX-NEXT: popl %eax ## encoding: [0x58]
; CHECK32-SKX-NEXT: popl %ecx ## encoding: [0x59]
; CHECK32-SKX-NEXT: popl %edx ## encoding: [0x5a]
+; CHECK32-SKX-NEXT: popl %ebp ## encoding: [0x5d]
; CHECK32-SKX-NEXT: iretl ## encoding: [0xcf]
call void @bar()
ret void
More information about the llvm-commits
mailing list