[llvm] r367088 - [WinEH] Allocate space in funclets stack to save XMM CSRs
Pengfei Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 00:33:15 PDT 2019
Author: pengfei
Date: Fri Jul 26 00:33:15 2019
New Revision: 367088
URL: http://llvm.org/viewvc/llvm-project?rev=367088&view=rev
Log:
[WinEH] Allocate space in funclets stack to save XMM CSRs
Summary:
This is an alternate approach to D57970.
Currently funclets reuse the same stack slots that are used in the
parent function for saving callee-saved xmm registers. If the parent
function modifies a callee-saved xmm register before an excpetion is
thrown, the catch handler will overwrite the original saved value.
This patch allocates space in funclets stack for saving callee-saved xmm
registers and uses RSP instead RBP to access memory.
Reviewers: andrew.w.kaylor, LuoYuanke, annita.zhang, craig.topper,
RKSimon
Subscribers: rnk, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63396
Signed-off-by: pengfei <pengfei.wang at intel.com>
Added:
llvm/trunk/test/CodeGen/X86/win64-funclet-savexmm.ll
Modified:
llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
llvm/trunk/lib/Target/X86/X86FrameLowering.h
llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
llvm/trunk/test/CodeGen/X86/catchpad-realign-savexmm.ll
llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll
Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Fri Jul 26 00:33:15 2019
@@ -935,7 +935,10 @@ bool X86FrameLowering::has128ByteRedZone
; calls @llvm.eh.unwind.init
[if needs FP]
[for all callee-saved XMM registers]
- movaps %<xmm reg>, -MMM(%rbp)
+ [if funclet]
+ movaps %<xmm reg>, -MMM(%rsp)
+ [else]
+ movaps %<xmm reg>, -MMM(%rbp)
[for all callee-saved XMM registers]
.seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
; i.e. the offset relative to (%rbp - SEHFrameOffset)
@@ -955,7 +958,10 @@ bool X86FrameLowering::has128ByteRedZone
; Emit CFI info
[if needs FP]
[for all callee-saved registers]
- .cfi_offset %<reg>, (offset from %rbp)
+ [if funclet]
+ movaps -MMM(%rsp), %<xmm reg>
+ [else]
+ .cfi_offset %<reg>, (offset from %rbp)
[else]
.cfi_def_cfa_offset (offset from RETADDR)
[for all callee-saved registers]
@@ -1177,11 +1183,16 @@ void X86FrameLowering::emitPrologue(Mach
MFI.setOffsetAdjustment(-StackSize);
}
- // For EH funclets, only allocate enough space for outgoing calls. Save the
- // NumBytes value that we would've used for the parent frame.
+ // For EH funclets, only allocate enough space for outgoing calls and callee
+ // saved XMM registers on Windows 64 bits. Save the NumBytes value that we
+ // would've used for the parent frame.
+ int XMMFrameSlotOrigin;
unsigned ParentFrameNumBytes = NumBytes;
- if (IsFunclet)
+ if (IsFunclet) {
NumBytes = getWinEHFuncletFrameSize(MF);
+ if (IsWin64Prologue)
+ NumBytes += X86FI->getCalleeSavedXMMFrameInfo(XMMFrameSlotOrigin);
+ }
// Skip the callee-saved push instructions.
bool PushedRegs = false;
@@ -1389,19 +1400,33 @@ void X86FrameLowering::emitPrologue(Mach
}
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
- const MachineInstr &FrameInstr = *MBBI;
+ auto FrameInstr = MBBI;
++MBBI;
if (NeedsWinCFI) {
int FI;
- if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
+ if (unsigned Reg = TII.isStoreToStackSlot(*FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
- unsigned IgnoredFrameReg;
- int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
- Offset += SEHFrameOffset;
-
+ int Offset = 0;
HasWinCFI = true;
- assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
+ if (IsFunclet) {
+ assert(IsWin64Prologue && "Only valid on Windows 64bit");
+ unsigned Size = TRI->getSpillSize(X86::VR128RegClass);
+ unsigned Align = TRI->getSpillAlignment(X86::VR128RegClass);
+ Offset = (FI - XMMFrameSlotOrigin - 1) * Size +
+ alignDown(NumBytes, Align);
+ addRegOffset(BuildMI(MBB, MBBI, DL,
+ TII.get(getXMMAlignedLoadStoreOp(false))),
+ StackPtr, true, Offset)
+ .addReg(Reg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.erase(FrameInstr);
+ } else {
+ assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
+ unsigned IgnoredFrameReg;
+ Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
+ SEHFrameOffset;
+ }
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
.addImm(Reg)
.addImm(Offset)
@@ -1621,6 +1646,9 @@ void X86FrameLowering::emitEpilogue(Mach
if (IsFunclet) {
assert(HasFP && "EH funclets without FP not yet implemented");
NumBytes = getWinEHFuncletFrameSize(MF);
+ int Ignore;
+ if (IsWin64Prologue)
+ NumBytes += X86FI->getCalleeSavedXMMFrameInfo(Ignore);
} else if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -1948,6 +1976,8 @@ bool X86FrameLowering::assignCalleeSaved
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CalleeSavedFrameSize = 0;
+ unsigned CalleeSavedXMMFrameSize = 0;
+ int CalleeSavedXMMSlotOrigin = 0;
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -2011,9 +2041,44 @@ bool X86FrameLowering::assignCalleeSaved
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
// Assign slots for XMMs.
+ for (unsigned i = CSI.size(), Size = 0; i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+ // According to Microsoft "x64 software conventions", only XMM registers
+ // are nonvolatile except the GPR.
+ if (!X86::VR128RegClass.contains(Reg))
+ continue;
+ // Since all registers have the same size, we just initialize once.
+ if (Size == 0) {
+ unsigned Align = TRI->getSpillAlignment(X86::VR128RegClass);
+ // ensure alignment
+ int Remainder = SpillSlotOffset % Align;
+ if (Remainder < 0)
+ SpillSlotOffset -= Align + Remainder;
+ else
+ SpillSlotOffset -= Remainder;
+ MFI.ensureMaxAlignment(Align);
+ Size = TRI->getSpillSize(X86::VR128RegClass);
+ }
+ // spill into slot
+ SpillSlotOffset -= Size;
+ int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ // Since we allocate XMM slot consecutively in stack, we just need to
+ // record the first one for the funclet use.
+ if (CalleeSavedXMMFrameSize == 0) {
+ CalleeSavedXMMSlotOrigin = SlotIndex;
+ }
+ CalleeSavedXMMFrameSize += Size;
+ }
+
+ X86FI->setCalleeSavedXMMFrameInfo(CalleeSavedXMMFrameSize,
+ CalleeSavedXMMSlotOrigin);
+
+ // Assign slots for others.
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i - 1].getReg();
- if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg) ||
+ X86::VR128RegClass.contains(Reg))
continue;
// If this is k-register make sure we lookup via the largest legal type.
@@ -2025,7 +2090,11 @@ bool X86FrameLowering::assignCalleeSaved
unsigned Size = TRI->getSpillSize(*RC);
unsigned Align = TRI->getSpillAlignment(*RC);
// ensure alignment
- SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
+ int Remainder = SpillSlotOffset % Align;
+ if (Remainder < 0)
+ SpillSlotOffset -= Align + Remainder;
+ else
+ SpillSlotOffset -= Remainder;
// spill into slot
SpillSlotOffset -= Size;
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
@@ -2164,19 +2233,32 @@ bool X86FrameLowering::restoreCalleeSave
DebugLoc DL = MBB.findDebugLoc(MI);
// Reload XMMs from stack frame.
+ MachineFunction &MF = *MBB.getParent();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int XMMFrameSlotOrigin;
+ int SEHFrameOffset = X86FI->getCalleeSavedXMMFrameInfo(XMMFrameSlotOrigin) +
+ MF.getFrameInfo().getMaxCallFrameSize();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (X86::GR64RegClass.contains(Reg) ||
- X86::GR32RegClass.contains(Reg))
- continue;
+ if (MBB.isEHFuncletEntry() && STI.is64Bit()) {
+ if (X86::VR128RegClass.contains(Reg)) {
+ int Offset = (CSI[i].getFrameIdx() - XMMFrameSlotOrigin - 1) * 16;
+ addRegOffset(BuildMI(MBB, MI, DL,
+ TII.get(getXMMAlignedLoadStoreOp(true)), Reg),
+ X86::RSP, true, SEHFrameOffset + Offset);
+ }
+ } else {
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
- // If this is k-register make sure we lookup via the largest legal type.
- MVT VT = MVT::Other;
- if (X86::VK16RegClass.contains(Reg))
- VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
+ // If this is k-register make sure we lookup via the largest legal type.
+ MVT VT = MVT::Other;
+ if (X86::VK16RegClass.contains(Reg))
+ VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ }
}
// POP GPRs.
@@ -3185,3 +3267,8 @@ void X86FrameLowering::processFunctionBe
UnwindHelpFI)
.addImm(-2);
}
+
+unsigned X86FrameLowering::getXMMAlignedLoadStoreOp(const bool IsLoad) const {
+ return IsLoad ? (STI.hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm)
+ : (STI.hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr);
+}
Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.h?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.h Fri Jul 26 00:33:15 2019
@@ -217,6 +217,10 @@ private:
void emitCatchRetReturnValue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineInstr *CatchRet) const;
+
+ /// Select the best opcode for the subtarget when funclet XMM CSRs
+ /// save/restore.
+ unsigned getXMMAlignedLoadStoreOp(const bool IsLoad) const;
};
} // End llvm namespace
Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Fri Jul 26 00:33:15 2019
@@ -40,6 +40,14 @@ class X86MachineFunctionInfo : public Ma
/// stack frame in bytes.
unsigned CalleeSavedFrameSize = 0;
+ /// CalleeSavedXMMFrameSize - Size of the callee-saved XMM register portion
+ /// of the stack frame in bytes.
+ unsigned CalleeSavedXMMFrameSize = 0;
+
+ /// CalleeSavedXMMFrameOrigin - Origin slot of the callee-saved XMM register
+ /// portion of the stack frame.
+ int CalleeSavedXMMFrameOrigin = 0;
+
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
/// to the space used by the return address).
/// Used on windows platform for stdcall & fastcall name decoration
@@ -123,6 +131,11 @@ public:
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+ unsigned getCalleeSavedXMMFrameInfo(int &origin) const
+ { origin = CalleeSavedXMMFrameOrigin; return CalleeSavedXMMFrameSize; }
+ void setCalleeSavedXMMFrameInfo(unsigned size, int origin)
+ { CalleeSavedXMMFrameSize = size; CalleeSavedXMMFrameOrigin = origin; }
+
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intel-ocl.ll Fri Jul 26 00:33:15 2019
@@ -184,14 +184,14 @@ define intel_ocl_bicc <16 x float> @test
; WIN64-KNL-LABEL: test_prolog_epilog:
; WIN64-KNL: # %bb.0:
; WIN64-KNL-NEXT: pushq %rbp
-; WIN64-KNL-NEXT: subq $1328, %rsp # imm = 0x530
+; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; WIN64-KNL-NEXT: kmovw %k7, 1198(%rbp) # 2-byte Spill
-; WIN64-KNL-NEXT: kmovw %k6, 1196(%rbp) # 2-byte Spill
-; WIN64-KNL-NEXT: kmovw %k5, 1194(%rbp) # 2-byte Spill
-; WIN64-KNL-NEXT: kmovw %k4, 1192(%rbp) # 2-byte Spill
-; WIN64-KNL-NEXT: vmovaps %zmm21, 1104(%rbp) # 64-byte Spill
-; WIN64-KNL-NEXT: vmovaps %zmm20, 992(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT: kmovw %k7, 1134(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT: kmovw %k6, 1132(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT: kmovw %k5, 1130(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT: kmovw %k4, 1128(%rbp) # 2-byte Spill
+; WIN64-KNL-NEXT: vmovaps %zmm21, 1024(%rbp) # 64-byte Spill
+; WIN64-KNL-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill
; WIN64-KNL-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill
; WIN64-KNL-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill
; WIN64-KNL-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill
@@ -226,26 +226,26 @@ define intel_ocl_bicc <16 x float> @test
; WIN64-KNL-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload
; WIN64-KNL-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload
; WIN64-KNL-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload
-; WIN64-KNL-NEXT: vmovaps 992(%rbp), %zmm20 # 64-byte Reload
-; WIN64-KNL-NEXT: vmovaps 1104(%rbp), %zmm21 # 64-byte Reload
-; WIN64-KNL-NEXT: kmovw 1192(%rbp), %k4 # 2-byte Reload
-; WIN64-KNL-NEXT: kmovw 1194(%rbp), %k5 # 2-byte Reload
-; WIN64-KNL-NEXT: kmovw 1196(%rbp), %k6 # 2-byte Reload
-; WIN64-KNL-NEXT: kmovw 1198(%rbp), %k7 # 2-byte Reload
-; WIN64-KNL-NEXT: leaq 1200(%rbp), %rsp
+; WIN64-KNL-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload
+; WIN64-KNL-NEXT: vmovaps 1024(%rbp), %zmm21 # 64-byte Reload
+; WIN64-KNL-NEXT: kmovw 1128(%rbp), %k4 # 2-byte Reload
+; WIN64-KNL-NEXT: kmovw 1130(%rbp), %k5 # 2-byte Reload
+; WIN64-KNL-NEXT: kmovw 1132(%rbp), %k6 # 2-byte Reload
+; WIN64-KNL-NEXT: kmovw 1134(%rbp), %k7 # 2-byte Reload
+; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
; WIN64-KNL-NEXT: popq %rbp
; WIN64-KNL-NEXT: retq
;
; WIN64-SKX-LABEL: test_prolog_epilog:
; WIN64-SKX: # %bb.0:
; WIN64-SKX-NEXT: pushq %rbp
-; WIN64-SKX-NEXT: subq $1328, %rsp # imm = 0x530
+; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; WIN64-SKX-NEXT: kmovq %k7, 1192(%rbp) # 8-byte Spill
-; WIN64-SKX-NEXT: kmovq %k6, 1184(%rbp) # 8-byte Spill
-; WIN64-SKX-NEXT: kmovq %k5, 1176(%rbp) # 8-byte Spill
-; WIN64-SKX-NEXT: kmovq %k4, 1168(%rbp) # 8-byte Spill
-; WIN64-SKX-NEXT: vmovaps %zmm21, 1056(%rbp) # 64-byte Spill
+; WIN64-SKX-NEXT: kmovq %k7, 1128(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT: kmovq %k6, 1120(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT: kmovq %k5, 1112(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT: kmovq %k4, 1104(%rbp) # 8-byte Spill
+; WIN64-SKX-NEXT: vmovaps %zmm21, 1024(%rbp) # 64-byte Spill
; WIN64-SKX-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill
; WIN64-SKX-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill
; WIN64-SKX-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill
@@ -282,12 +282,12 @@ define intel_ocl_bicc <16 x float> @test
; WIN64-SKX-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload
; WIN64-SKX-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload
; WIN64-SKX-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload
-; WIN64-SKX-NEXT: vmovaps 1056(%rbp), %zmm21 # 64-byte Reload
-; WIN64-SKX-NEXT: kmovq 1168(%rbp), %k4 # 8-byte Reload
-; WIN64-SKX-NEXT: kmovq 1176(%rbp), %k5 # 8-byte Reload
-; WIN64-SKX-NEXT: kmovq 1184(%rbp), %k6 # 8-byte Reload
-; WIN64-SKX-NEXT: kmovq 1192(%rbp), %k7 # 8-byte Reload
-; WIN64-SKX-NEXT: leaq 1200(%rbp), %rsp
+; WIN64-SKX-NEXT: vmovaps 1024(%rbp), %zmm21 # 64-byte Reload
+; WIN64-SKX-NEXT: kmovq 1104(%rbp), %k4 # 8-byte Reload
+; WIN64-SKX-NEXT: kmovq 1112(%rbp), %k5 # 8-byte Reload
+; WIN64-SKX-NEXT: kmovq 1120(%rbp), %k6 # 8-byte Reload
+; WIN64-SKX-NEXT: kmovq 1128(%rbp), %k7 # 8-byte Reload
+; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
; WIN64-SKX-NEXT: popq %rbp
; WIN64-SKX-NEXT: retq
;
@@ -346,7 +346,7 @@ define intel_ocl_bicc <16 x float> @test
; X64-SKX: ## %bb.0:
; X64-SKX-NEXT: pushq %rsi
; X64-SKX-NEXT: pushq %rdi
-; X64-SKX-NEXT: subq $1192, %rsp ## imm = 0x4A8
+; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
; X64-SKX-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
; X64-SKX-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
; X64-SKX-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
@@ -388,7 +388,7 @@ define intel_ocl_bicc <16 x float> @test
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
-; X64-SKX-NEXT: addq $1192, %rsp ## imm = 0x4A8
+; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
; X64-SKX-NEXT: popq %rdi
; X64-SKX-NEXT: popq %rsi
; X64-SKX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/catchpad-realign-savexmm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/catchpad-realign-savexmm.ll?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/catchpad-realign-savexmm.ll (original)
+++ llvm/trunk/test/CodeGen/X86/catchpad-realign-savexmm.ll Fri Jul 26 00:33:15 2019
@@ -51,3 +51,18 @@ catch:
; CHECK: popq %rbp
; CHECK: retq
; CHECK: .seh_handlerdata
+; CHECK: # %catch
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: subq $48, %rsp
+; CHECK: .seh_stackalloc 48
+; CHECK: leaq 64(%rdx), %rbp
+; CHECK: movapd %xmm6, 32(%rsp)
+; CHECK: .seh_savexmm 6, 32
+; CHECK: .seh_endprologue
+; CHECK: movapd 32(%rsp), %xmm6
+; CHECK: leaq .LBB0_1(%rip), %rax
+; CHECK: addq $48, %rsp
+; CHECK: popq %rbp
+; CHECK: retq # CATCHRET
Added: llvm/trunk/test/CodeGen/X86/win64-funclet-savexmm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64-funclet-savexmm.ll?rev=367088&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64-funclet-savexmm.ll (added)
+++ llvm/trunk/test/CodeGen/X86/win64-funclet-savexmm.ll Fri Jul 26 00:33:15 2019
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -mattr=+avx < %s | FileCheck %s
+
+; void foo(void)
+; {
+; __asm("nop" ::: "bx", "cx", "xmm5", "xmm6", "ymm7");
+; try {
+; throw;
+; }
+; catch (int x) {
+; }
+; }
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"??_R0H at 8" = comdat any
+
+@"??_7type_info@@6B@" = external constant i8*
+@"??_R0H at 8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+declare dso_local i32 @__CxxFrameHandler3(...)
+declare dso_local x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+define dso_local void @"?foo@@YAXXZ"() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %x = alloca i32, align 4
+ call void asm sideeffect "nop", "~{bx},~{cx},~{xmm5},~{xmm6},~{ymm7}"()
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [%rtti.TypeDescriptor2* @"??_R0H at 8", i32 0, i32* %x]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK: # %catch
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: pushq %rbx
+; CHECK: .seh_pushreg 3
+; CHECK: subq $72, %rsp
+; CHECK: .seh_stackalloc 72
+; CHECK: leaq 80(%rdx), %rbp
+; CHECK: vmovaps %xmm7, 48(%rsp)
+; CHECK: .seh_savexmm 7, 48
+; CHECK: vmovaps %xmm6, 32(%rsp)
+; CHECK: .seh_savexmm 6, 32
+; CHECK: .seh_endprologue
+; CHECK: vmovaps 32(%rsp), %xmm6
+; CHECK: vmovaps 48(%rsp), %xmm7
+; CHECK: leaq .LBB0_3(%rip), %rax
+; CHECK: addq $72, %rsp
+; CHECK: popq %rbx
+; CHECK: popq %rbp
+; CHECK: retq # CATCHRET
Modified: llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll?rev=367088&r1=367087&r2=367088&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interrupt_cc.ll Fri Jul 26 00:33:15 2019
@@ -294,7 +294,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK64-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
; CHECK64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
-; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
; CHECK64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
; CHECK64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
@@ -398,7 +398,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK64-SKX-NEXT: .cfi_offset %xmm28, -448
; CHECK64-SKX-NEXT: .cfi_offset %xmm29, -384
; CHECK64-SKX-NEXT: .cfi_offset %xmm30, -320
-; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -224
+; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -256
; CHECK64-SKX-NEXT: .cfi_offset %k0, -144
; CHECK64-SKX-NEXT: .cfi_offset %k1, -136
; CHECK64-SKX-NEXT: .cfi_offset %k2, -128
@@ -474,7 +474,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
-; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
+; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
@@ -635,7 +635,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK32-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
; CHECK32-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
; CHECK32-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
; CHECK32-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
@@ -661,7 +661,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK32-SKX-NEXT: .cfi_offset %xmm4, -384
; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
-; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -160
+; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192
; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
@@ -689,7 +689,7 @@ define x86_intrcc void @foo(i8* %frame)
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
-; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
+; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
More information about the llvm-commits
mailing list