[llvm] c8ef3d5 - Fix stack-clash probing for large static alloca
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 3 00:22:26 PDT 2020
Author: serge-sans-paille
Date: 2020-07-03T09:22:03+02:00
New Revision: c8ef3d5a2f19d5ff6b907d2ca877f96b67d979db
URL: https://github.com/llvm/llvm-project/commit/c8ef3d5a2f19d5ff6b907d2ca877f96b67d979db
DIFF: https://github.com/llvm/llvm-project/commit/c8ef3d5a2f19d5ff6b907d2ca877f96b67d979db.diff
LOG: Fix stack-clash probing for large static alloca
Differential Revision: https://reviews.llvm.org/D82867
Added:
Modified:
llvm/lib/Target/X86/X86FrameLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
llvm/test/CodeGen/X86/stack-clash-large.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index b360aa9d905c..c1bd80c41f13 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -636,16 +636,16 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
- Register FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr)
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
// save loop bound
{
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr)
- .addReg(FinalStackPtr)
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
+ .addReg(FinalStackProbed)
.addImm(Offset / StackProbeSize * StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -669,13 +669,13 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// cmp with stack pointer bound
BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
.addReg(StackPtr)
- .addReg(FinalStackPtr)
+ .addReg(FinalStackProbed)
.setMIFlag(MachineInstr::FrameSetup);
// jump
BuildMI(testMBB, DL, TII.get(X86::JCC_1))
.addMBB(testMBB)
- .addImm(X86::COND_L)
+ .addImm(X86::COND_NE)
.setMIFlag(MachineInstr::FrameSetup);
testMBB->addSuccessor(testMBB);
testMBB->addSuccessor(tailMBB);
@@ -686,10 +686,12 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MBB.addSuccessor(testMBB);
// handle tail
- if (Offset % StackProbeSize) {
- BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY),
- StackPtr)
- .addReg(FinalStackPtr)
+ unsigned TailOffset = Offset % StackProbeSize;
+ if (TailOffset) {
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
+ BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(TailOffset)
.setMIFlag(MachineInstr::FrameSetup);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 914f06169577..e214e9b9a202 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31800,8 +31800,8 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
BuildMI(testMBB, DL,
TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
- .addReg(physSPReg)
- .addReg(FinalStackPtr);
+ .addReg(FinalStackPtr)
+ .addReg(physSPReg);
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
.addMBB(tailMBB)
diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
index 928b7a9cba02..bc4678564083 100644
--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
@@ -23,12 +23,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
; CHECK-X86-64-NEXT: leaq 15(,%rcx,4), %rcx
; CHECK-X86-64-NEXT: andq $-16, %rcx
; CHECK-X86-64-NEXT: subq %rcx, %rax
-; CHECK-X86-64-NEXT: cmpq %rax, %rsp
+; CHECK-X86-64-NEXT: cmpq %rsp, %rax
; CHECK-X86-64-NEXT: jl .LBB0_3
; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-X86-64-NEXT: movq $0, (%rsp)
; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-X86-64-NEXT: cmpq %rax, %rsp
+; CHECK-X86-64-NEXT: cmpq %rsp, %rax
; CHECK-X86-64-NEXT: jge .LBB0_2
; CHECK-X86-64-NEXT: .LBB0_3:
; CHECK-X86-64-NEXT: movq %rax, %rsp
@@ -53,12 +53,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
; CHECK-X86-32-NEXT: leal 15(,%ecx,4), %ecx
; CHECK-X86-32-NEXT: andl $-16, %ecx
; CHECK-X86-32-NEXT: subl %ecx, %eax
-; CHECK-X86-32-NEXT: cmpl %eax, %esp
+; CHECK-X86-32-NEXT: cmpl %esp, %eax
; CHECK-X86-32-NEXT: jl .LBB0_3
; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-X86-32-NEXT: movl $0, (%esp)
; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
-; CHECK-X86-32-NEXT: cmpl %eax, %esp
+; CHECK-X86-32-NEXT: cmpl %esp, %eax
; CHECK-X86-32-NEXT: jge .LBB0_2
; CHECK-X86-32-NEXT: .LBB0_3:
; CHECK-X86-32-NEXT: movl %eax, %esp
diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll
index ccc52f213e37..dd53cd8f6964 100644
--- a/llvm/test/CodeGen/X86/stack-clash-large.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-large.ll
@@ -22,9 +22,9 @@ attributes #0 = {"probe-stack"="inline-asm"}
; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-X86-64-NEXT: movq $0, (%rsp)
; CHECK-X86-64-NEXT: cmpq %r11, %rsp
-; CHECK-X86-64-NEXT: jl .LBB0_1
+; CHECK-X86-64-NEXT: jne .LBB0_1
; CHECK-X86-64-NEXT:# %bb.2:
-; CHECK-X86-64-NEXT: movq %r11, %rsp
+; CHECK-X86-64-NEXT: subq $2248, %rsp
; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888
; CHECK-X86-64-NEXT: movl $1, 264(%rsp)
; CHECK-X86-64-NEXT: movl $1, 28664(%rsp)
@@ -41,9 +41,9 @@ attributes #0 = {"probe-stack"="inline-asm"}
; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
; CHECK-X86-32-NEXT: movl $0, (%esp)
; CHECK-X86-32-NEXT: cmpl %r11d, %esp
-; CHECK-X86-32-NEXT: jl .LBB0_1
+; CHECK-X86-32-NEXT: jne .LBB0_1
; CHECK-X86-32-NEXT:# %bb.2:
-; CHECK-X86-32-NEXT: movl %r11d, %esp
+; CHECK-X86-32-NEXT: subl $2380, %esp
; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016
; CHECK-X86-32-NEXT: movl $1, 392(%esp)
; CHECK-X86-32-NEXT: movl $1, 28792(%esp)
More information about the llvm-commits
mailing list