[llvm-commits] [Review request][Win64] Improve allocating shadow area

Mon Dec 13 07:07:10 PST 2010

Good midnight, guys!

I tweaked Win64's calling conversion.

  - On leaf functions(do not have win64's callee), redundant shadow
area would not be emitted.
  - [PR8778] Calling parameters would be constructed properly onto
dynamic stack allocation(alloca).

This patch is not testsd widely.
I would like to point out whatever I missed.
Please take a look!

...Takumi
-------------- next part --------------
From 9b0a03ce39694d036353b7d8d4b30fefb86b3b95 Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi <geek4civic at gmail.com>
Date: Mon, 13 Dec 2010 17:59:20 +0900
Subject: [PATCH] Target/X86: Allocate Win64's shadow allocation, on stack, when callee requires one.

In leaf functions, shadow area would not be allocated any more.
---
 lib/Target/X86/X86FrameInfo.cpp                    |    5 -----
 lib/Target/X86/X86ISelLowering.cpp                 |    8 ++++++--
 test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll |    4 ++--
 test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll       |    9 ++++-----
 test/CodeGen/X86/win64_params.ll                   |    4 ++--
 test/CodeGen/X86/win64_vararg.ll                   |   10 +++++-----
 6 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/lib/Target/X86/X86FrameInfo.cpp b/lib/Target/X86/X86FrameInfo.cpp
index c47b0fa..3733f6c 100644
--- a/lib/Target/X86/X86FrameInfo.cpp
+++ b/lib/Target/X86/X86FrameInfo.cpp
@@ -325,11 +325,6 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
     if (HasFP) MinSize += SlotSize;
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
     MFI->setStackSize(StackSize);
-  } else if (IsWin64) {
-    // We need to always allocate 32 bytes as register spill area.
-    // FIXME: We might reuse these 32 bytes for leaf functions.
-    StackSize += 32;
-    MFI->setStackSize(StackSize);
   }
 
   // Insert stack pointer adjustment for later moving of return addr.  Only
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 045bb93..fd93462 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1807,8 +1807,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     ISD::ArgFlagsTy Flags) const {
-  const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
-  unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+  unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   if (Flags.isByVal())
@@ -1892,6 +1891,11 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
+
+  // Win64 CC requires at least 4 x 64bit of shadow allocation.
+  if (Subtarget->isTargetWin64())
+    CCInfo.AllocateStack(32, 16);
+
   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
 
   // Get a count of how many bytes are to be pushed on the stack.
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c598228..0413044 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
+; CHECK: subq $8, %rsp
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 810a6f4..9833d96 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -o %t1
-; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
+; CHECK: subq    $40, %rsp
+; CHECK: movaps  %xmm8, (%rsp)
+; CHECK: movaps  %xmm7, 16(%rsp)
 
 define i32 @a() nounwind {
 entry:
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
index 0b67368..f9d4bf9 100644
--- a/test/CodeGen/X86/win64_params.ll
+++ b/test/CodeGen/X86/win64_params.ll
@@ -4,8 +4,8 @@
 ; on the stack.
 define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
 entry:
-; CHECK: movl    80(%rsp), %eax
-; CHECK: addl    72(%rsp), %eax
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index 072f36a..aa7d786 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -5,11 +5,11 @@
 ; calculated.
 define void @average_va(i32 %count, ...) nounwind {
 entry:
-; CHECK: subq	$40, %rsp
-; CHECK: movq	%r9, 72(%rsp)
-; CHECK: movq	%r8, 64(%rsp)
-; CHECK: movq	%rdx, 56(%rsp)
-; CHECK: leaq	56(%rsp), %rax
+; CHECK: subq	$8, %rsp
+; CHECK: movq	%r9, 40(%rsp)
+; CHECK: movq	%r8, 32(%rsp)
+; CHECK: movq	%rdx, 24(%rsp)
+; CHECK: leaq	24(%rsp), %rax
 
   %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
   %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=1]
-- 
1.7.1.GIT