[llvm] 8eae321 - Improve stack-clash implementation on x86
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 25 05:48:23 PDT 2020
Author: serge-sans-paille
Date: 2020-05-25T14:48:14+02:00
New Revision: 8eae32188bbaa4ac5943f8a98b3b7e4bbba55698
URL: https://github.com/llvm/llvm-project/commit/8eae32188bbaa4ac5943f8a98b3b7e4bbba55698
DIFF: https://github.com/llvm/llvm-project/commit/8eae32188bbaa4ac5943f8a98b3b7e4bbba55698.diff
LOG: Improve stack-clash implementation on x86
- test both 32 and 64 bit version
- probe the tail in dynamic-alloca
- generate more concise code
Differential Revision: https://reviews.llvm.org/D79482
Added:
Modified:
llvm/lib/Target/X86/X86FrameLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
llvm/test/CodeGen/X86/stack-clash-large.ll
llvm/test/CodeGen/X86/stack-clash-medium.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 062cf7acc58d..f320041b2de6 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -275,9 +275,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
// allocation is split in smaller chunks anyway.
if (EmitInlineStackProbe && !InEpilogue) {
- // stack probing may involve looping, and control flow generations is
- // disallowed at this point. Rely to later processing through
- // `inlineStackProbe`.
+ // Delegate stack probing to the `inlineStackProbe` mechanism to avoid
+ // complications.
MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
// Encode the static offset as a metadata attached to the stub.
@@ -645,6 +644,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
uint64_t Offset) const {
+ assert(Offset && "null offset");
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
@@ -662,8 +662,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
- unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr)
+ Register FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -693,7 +693,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
.setMIFlag(MachineInstr::FrameSetup);
// cmp with stack pointer bound
- BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr))
+ BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
.addReg(StackPtr)
.addReg(FinalStackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -701,23 +701,22 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// jump
BuildMI(testMBB, DL, TII.get(X86::JCC_1))
.addMBB(testMBB)
- .addImm(X86::COND_NE)
+ .addImm(X86::COND_L)
.setMIFlag(MachineInstr::FrameSetup);
testMBB->addSuccessor(testMBB);
testMBB->addSuccessor(tailMBB);
testMBB->addLiveIn(FinalStackPtr);
- // allocate a block and touch it
-
+ // BB management
tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
MBB.addSuccessor(testMBB);
+ // handle tail
if (Offset % StackProbeSize) {
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(Offset % StackProbeSize)
+ BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY),
+ StackPtr)
+ .addReg(FinalStackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eab9f14bec91..5101977a68ed 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31612,14 +31612,26 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
return SinkMBB;
}
+static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
- MachineBasicBlock *BB) const {
- MachineFunction *MF = BB->getParent();
+ MachineBasicBlock *MBB) const {
+ MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
const unsigned ProbeSize = getStackProbeSize(*MF);
@@ -31628,31 +31640,35 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator MBBIter = ++BB->getIterator();
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
MF->insert(MBBIter, testMBB);
MF->insert(MBBIter, blockMBB);
MF->insert(MBBIter, tailMBB);
- unsigned sizeVReg = MI.getOperand(1).getReg();
+ Register sizeVReg = MI.getOperand(1).getReg();
- const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
+ Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
- unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
- unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
+ Register TmpStackPtr = MRI.createVirtualRegister(
+ TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
+ Register FinalStackPtr = MRI.createVirtualRegister(
+ TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
- unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
+ BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr)
+ .addReg(physSPReg);
+ {
+ const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr;
+ BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr)
+ .addReg(TmpStackPtr)
+ .addReg(sizeVReg);
+ }
// test rsp size
- BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
- .addReg(sizeVReg)
- .addMBB(BB)
- .addReg(tmpSizeVReg2)
- .addMBB(blockMBB);
BuildMI(testMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri))
- .addReg(tmpSizeVReg)
- .addImm(ProbeSize);
+ TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(physSPReg)
+ .addReg(FinalStackPtr);
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
.addMBB(tailMBB)
@@ -31663,14 +31679,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
// allocate a block and touch it
BuildMI(blockMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
- tmpSizeVReg2)
- .addReg(tmpSizeVReg)
- .addImm(ProbeSize);
-
- BuildMI(blockMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
- physSPReg)
+ TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg)
.addReg(physSPReg)
.addImm(ProbeSize);
@@ -31682,19 +31691,14 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
blockMBB->addSuccessor(testMBB);
- // allocate the tail and continue
- BuildMI(tailMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr),
- physSPReg)
- .addReg(physSPReg)
- .addReg(tmpSizeVReg);
+ // Replace original instruction by the expected stack ptr
BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
- .addReg(physSPReg);
+ .addReg(FinalStackPtr);
- tailMBB->splice(tailMBB->end(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- tailMBB->transferSuccessorsAndUpdatePHIs(BB);
- BB->addSuccessor(testMBB);
+ tailMBB->splice(tailMBB->end(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ tailMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MBB->addSuccessor(testMBB);
// Delete the original pseudo instruction.
MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
index c0a199e16a94..140da42fc6fb 100644
--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
@@ -1,39 +1,7 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s
define i32 @foo(i32 %n) local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset %rbp, -16
-; CHECK-NEXT: movq %rsp, %rbp
-; CHECK-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: leaq 15(,%rax,4), %rax
-; CHECK-NEXT: andq $-16, %rax
-; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: jl .LBB0_3
-; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: subq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: jge .LBB0_2
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: subq %rax, %rsp
-; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: movl $1, 4792(%rax)
-; CHECK-NEXT: movl (%rax), %eax
-; CHECK-NEXT: movq %rbp, %rsp
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-; CHECK-NEXT: retq
-
%a = alloca i32, i32 %n, align 16
%b = getelementptr inbounds i32, i32* %a, i64 1198
store volatile i32 1, i32* %b
@@ -42,3 +10,62 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 {
}
attributes #0 = {"probe-stack"="inline-asm"}
+
+; CHECK-X86-64-LABEL: foo:
+; CHECK-X86-64: # %bb.0:
+; CHECK-X86-64-NEXT: pushq %rbp
+; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 16
+; CHECK-X86-64-NEXT: .cfi_offset %rbp, -16
+; CHECK-X86-64-NEXT: movq %rsp, %rbp
+; CHECK-X86-64-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-X86-64-NEXT: movq %rsp, %rax
+; CHECK-X86-64-NEXT: movl %edi, %ecx
+; CHECK-X86-64-NEXT: leaq 15(,%rcx,4), %rcx
+; CHECK-X86-64-NEXT: andq $-16, %rcx
+; CHECK-X86-64-NEXT: subq %rcx, %rax
+; CHECK-X86-64-NEXT: cmpq %rax, %rsp
+; CHECK-X86-64-NEXT: jl .LBB0_3
+; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-X86-64-NEXT: movq $0, (%rsp)
+; CHECK-X86-64-NEXT: cmpq %rax, %rsp
+; CHECK-X86-64-NEXT: jge .LBB0_2
+; CHECK-X86-64-NEXT: .LBB0_3:
+; CHECK-X86-64-NEXT: movq %rax, %rsp
+; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
+; CHECK-X86-64-NEXT: movl (%rax), %eax
+; CHECK-X86-64-NEXT: movq %rbp, %rsp
+; CHECK-X86-64-NEXT: popq %rbp
+; CHECK-X86-64-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-X86-64-NEXT: retq
+
+
+; CHECK-X86-32-LABEL: foo:
+; CHECK-X86-32: # %bb.0:
+; CHECK-X86-32-NEXT: pushl %ebp
+; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X86-32-NEXT: .cfi_offset %ebp, -8
+; CHECK-X86-32-NEXT: movl %esp, %ebp
+; CHECK-X86-32-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-X86-32-NEXT: subl $8, %esp
+; CHECK-X86-32-NEXT: movl 8(%ebp), %ecx
+; CHECK-X86-32-NEXT: movl %esp, %eax
+; CHECK-X86-32-NEXT: leal 15(,%ecx,4), %ecx
+; CHECK-X86-32-NEXT: andl $-16, %ecx
+; CHECK-X86-32-NEXT: subl %ecx, %eax
+; CHECK-X86-32-NEXT: cmpl %eax, %esp
+; CHECK-X86-32-NEXT: jl .LBB0_3
+; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
+; CHECK-X86-32-NEXT: movl $0, (%esp)
+; CHECK-X86-32-NEXT: cmpl %eax, %esp
+; CHECK-X86-32-NEXT: jge .LBB0_2
+; CHECK-X86-32-NEXT: .LBB0_3:
+; CHECK-X86-32-NEXT: movl %eax, %esp
+; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
+; CHECK-X86-32-NEXT: movl (%eax), %eax
+; CHECK-X86-32-NEXT: movl %ebp, %esp
+; CHECK-X86-32-NEXT: popl %ebp
+; CHECK-X86-32-NEXT: .cfi_def_cfa %esp, 4
+; CHECK-X86-32-NEXT: retl
+
diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll
index f9a5fdc17b84..ccc52f213e37 100644
--- a/llvm/test/CodeGen/X86/stack-clash-large.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-large.ll
@@ -1,31 +1,8 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s
define i32 @foo() local_unnamed_addr #0 {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsp, %r11
-; CHECK-NEXT: subq $69632, %r11 # imm = 0x11000
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: cmpq %r11, %rsp
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT:# %bb.2:
-; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8
-; CHECK-NEXT: .cfi_def_cfa_offset 71888
-; CHECK-NEXT: movl $1, 264(%rsp)
-; CHECK-NEXT: movl $1, 28664(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $71880, %rsp # imm = 0x118C8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
-
%a = alloca i32, i64 18000, align 16
%b0 = getelementptr inbounds i32, i32* %a, i64 98
%b1 = getelementptr inbounds i32, i32* %a, i64 7198
@@ -36,3 +13,41 @@ define i32 @foo() local_unnamed_addr #0 {
}
attributes #0 = {"probe-stack"="inline-asm"}
+
+; CHECK-X86-64-LABEL: foo:
+; CHECK-X86-64: # %bb.0:
+; CHECK-X86-64-NEXT: movq %rsp, %r11
+; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000
+; CHECK-X86-64-NEXT: .LBB0_1:
+; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-X86-64-NEXT: movq $0, (%rsp)
+; CHECK-X86-64-NEXT: cmpq %r11, %rsp
+; CHECK-X86-64-NEXT: jl .LBB0_1
+; CHECK-X86-64-NEXT:# %bb.2:
+; CHECK-X86-64-NEXT: movq %r11, %rsp
+; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888
+; CHECK-X86-64-NEXT: movl $1, 264(%rsp)
+; CHECK-X86-64-NEXT: movl $1, 28664(%rsp)
+; CHECK-X86-64-NEXT: movl -128(%rsp), %eax
+; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8
+; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X86-64-NEXT: retq
+
+; CHECK-X86-32-LABEL: foo:
+; CHECK-X86-32: # %bb.0:
+; CHECK-X86-32-NEXT: movl %esp, %r11d
+; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000
+; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
+; CHECK-X86-32-NEXT: movl $0, (%esp)
+; CHECK-X86-32-NEXT: cmpl %r11d, %esp
+; CHECK-X86-32-NEXT: jl .LBB0_1
+; CHECK-X86-32-NEXT:# %bb.2:
+; CHECK-X86-32-NEXT: movl %r11d, %esp
+; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016
+; CHECK-X86-32-NEXT: movl $1, 392(%esp)
+; CHECK-X86-32-NEXT: movl $1, 28792(%esp)
+; CHECK-X86-32-NEXT: movl (%esp), %eax
+; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C
+; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-X86-32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll
index 05af3478cfc0..5a97074025f1 100644
--- a/llvm/test/CodeGen/X86/stack-clash-medium.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll
@@ -1,25 +1,7 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s
define i32 @foo() local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
-; CHECK-NEXT: .cfi_def_cfa_offset 7888
-; CHECK-NEXT: movl $1, 672(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
-
-
%a = alloca i32, i64 2000, align 16
%b = getelementptr inbounds i32, i32* %a, i64 200
store volatile i32 1, i32* %b
@@ -28,3 +10,28 @@ define i32 @foo() local_unnamed_addr #0 {
}
attributes #0 = {"probe-stack"="inline-asm"}
+
+; CHECK-X86-64-LABEL: foo:
+; CHECK-X86-64: # %bb.0:
+; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-X86-64-NEXT: movq $0, (%rsp)
+; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8
+; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888
+; CHECK-X86-64-NEXT: movl $1, 672(%rsp)
+; CHECK-X86-64-NEXT: movl -128(%rsp), %eax
+; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8
+; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X86-64-NEXT: retq
+
+
+; CHECK-X86-32-LABEL: foo:
+; CHECK-X86-32: # %bb.0:
+; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
+; CHECK-X86-32-NEXT: movl $0, (%esp)
+; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C
+; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016
+; CHECK-X86-32-NEXT: movl $1, 800(%esp)
+; CHECK-X86-32-NEXT: movl (%esp), %eax
+; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C
+; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-X86-32-NEXT: retl
More information about the llvm-commits
mailing list