[PATCH] D65923: [X86] Fix stack probe issue on windows32.
LuoYuanke via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 19:54:15 PDT 2019
LuoYuanke created this revision.
LuoYuanke added reviewers: craig.topper, wxiao3, annita.zhang.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.
On windows if the frame size exceed 4096 bytes, compiler need to generate a call to _alloca_probe. X86CallFrameOptimization pass changes the reserved stack size and cause of stack probe function not be inserted. This patch fix the issue by detecting the call frame size, if the size exceed 4096 bytes, drop X86CallFrameOptimization.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D65923
Files:
llvm/lib/Target/X86/X86CallFrameOptimization.cpp
llvm/test/CodeGen/X86/nomovtopush.ll
Index: llvm/test/CodeGen/X86/nomovtopush.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/nomovtopush.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc"
+
+%struct._param_str = type { i32, i32, [4096 x i32], i32 }
+
+ at g_d = common dso_local local_unnamed_addr global i32 0, align 4
+ at g_c = common dso_local local_unnamed_addr global i32 0, align 4
+ at g_b = common dso_local local_unnamed_addr global i32 0, align 4
+ at g_a = common dso_local local_unnamed_addr global i32 0, align 4
+ at g_param = common dso_local global %struct._param_str zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define dso_local i32 @test() local_unnamed_addr {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl $16396, %eax # imm = 0x400C
+; CHECK-NEXT: calll __chkstk
+; CHECK-NEXT: movl _g_d, %eax
+; CHECK-NEXT: movl _g_c, %ecx
+; CHECK-NEXT: movl _g_b, %edx
+; CHECK-NEXT: movl _g_a, %esi
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %esi, (%esp)
+; CHECK-NEXT: calll _bar
+; CHECK-NEXT: movl $4099, %ecx # imm = 0x1003
+; CHECK-NEXT: movl %esp, %edi
+; CHECK-NEXT: movl $_g_param, %esi
+; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
+; CHECK-NEXT: calll _foo
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: addl $16396, %esp # imm = 0x400C
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: retl
+entry:
+ %0 = load i32, i32* @g_d, align 4, !tbaa !3
+ %1 = load i32, i32* @g_c, align 4, !tbaa !3
+ %2 = load i32, i32* @g_b, align 4, !tbaa !3
+ %3 = load i32, i32* @g_a, align 4, !tbaa !3
+ %call = tail call i32 @bar(i32 %3, i32 %2, i32 %1, i32 %0) #2
+ tail call void @foo(%struct._param_str* byval nonnull align 4 @g_param) #2
+ ret i32 0
+}
+
+declare dso_local i32 @bar(i32, i32, i32, i32) local_unnamed_addr
+
+declare dso_local void @foo(%struct._param_str* byval align 4) local_unnamed_addr
+
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
Index: llvm/lib/Target/X86/X86CallFrameOptimization.cpp
===================================================================
--- llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -244,9 +244,23 @@
ContextVector CallSeqVector;
+ bool UseStackProbe =
+ !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
for (auto &MBB : MF)
for (auto &MI : MBB)
if (MI.getOpcode() == FrameSetupOpcode) {
+ // If the frame size exceed stack probe size, compiler need
+ // to generate a stack probe call . This pass changes the
+ // reserved stack size and cause of stack probe function not
+ // be inserted, so bypass the pass on this scenario.
+ if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
+ return false;
CallContext Context;
collectCallInfo(MF, MBB, MI, Context);
CallSeqVector.push_back(Context);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65923.214056.patch
Type: text/x-patch
Size: 3658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190808/da8d91e0/attachment.bin>
More information about the llvm-commits
mailing list