[llvm-commits] Issue with Win64 local area stack offset

Jan Sjodin jan_sjodin at yahoo.com
Tue Jun 22 15:37:37 PDT 2010


I did another experiment and made sure the slow isel was exercised. With the example below
we need to push two arguments on the stack which should be above the shadow area. I updated
the patch to do the right thing and it now does the same thing as fast isel. The
shadow area is simply pushed before any arguments are analyzed on the caller/callee side.

Example: 
%structType = type <{ <16 x i32> }>

define i64 @myfun(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5) nounwind {
entry:
  %test.i = alloca %structType, align 64 
  %conv4.i = ptrtoint %structType* %test.i to i64 
  %tmp0 = add i64 %conv4.i, %arg0
  %tmp1 = add i64 %tmp0, %arg1
  %tmp2 = add i64 %tmp1, %arg2
  %tmp3 = add i64 %tmp2, %arg3
  %tmp4 = add i64 %tmp3, %arg4
  %tmp5 = add i64 %tmp4, %arg5
  ret i64 %tmp5
}

define i64 @myfuncaller() nounwind {

  %temp = call i64 @myfun(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0)
  
  ret i64 %temp
}


Generated code with new patch:

_myfun:                                 # @myfun
# BB#0:                                 # %entry
        pushq   %rbp
        movq    %rsp, %rbp
        subq    $112, %rsp
        leaq    -112(%rbp,%rcx), %rax
        addq    %rdx, %rax
        addq    %r8, %rax
        addq    %r9, %rax
        addq    48(%rbp), %rax
        addq    56(%rbp), %rax
        addq    $112, %rsp
        popq    %rbp
        ret

        .def     _myfuncaller;  .scl    2;      .type   32;     .endef
        .globl  _myfuncaller
        .align  16, 0x90
_myfuncaller:                           # @myfuncaller
# BB#0:
        pushq   %rbp
        movq    %rsp, %rbp
        subq    $48, %rsp   # <-- 16 bytes arg + 32 bytes shadow area
        xorl    %ecx, %ecx
        xorl    %edx, %edx
        xorl    %r8d, %r8d
        xorl    %r9d, %r9d
        movq    $0, 40(%rsp) # <-- offsets above shadow area
        movq    $0, 32(%rsp)
        call    _myfun
        addq    $48, %rsp
        popq    %rbp
        ret


Index: lib/Target/X86/X86RegisterInfo.cpp
===================================================================
--- lib/Target/X86/X86RegisterInfo.cpp  (revision 106454)
+++ lib/Target/X86/X86RegisterInfo.cpp  (working copy)
@@ -914,9 +914,6 @@
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
     MFI->setStackSize(StackSize);
   } else if (Subtarget->isTargetWin64()) {
-    // We need to always allocate 32 bytes as register spill area.
-    // FIXME: We might reuse these 32 bytes for leaf functions.
-    StackSize += 32;
     MFI->setStackSize(StackSize);
   }
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp  (revision 106454)
+++ lib/Target/X86/X86ISelLowering.cpp  (working copy)
@@ -1521,6 +1521,12 @@
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {  
+    CCInfo.AllocateStack(32, 8); 
+  }
+
   CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
 
   unsigned LastVal = ~0U;
@@ -1746,8 +1752,7 @@
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     ISD::ArgFlagsTy Flags) const {
-  const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
-  unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+  unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   if (Flags.isByVal()) {
@@ -1829,6 +1834,12 @@
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (Subtarget->isTargetWin64()) {  
+    CCInfo.AllocateStack(32, 8); 
+  }
+
   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
 
   // Get a count of how many bytes are to be pushed on the stack.
Index: lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- lib/Target/X86/X86TargetMachine.cpp (revision 106454)
+++ lib/Target/X86/X86TargetMachine.cpp (working copy)
@@ -100,7 +100,7 @@
     DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
               Subtarget.getStackAlignment(),
-              (Subtarget.isTargetWin64() ? -40 :
+              (Subtarget.isTargetWin64() ? -8 :
                (Subtarget.is64Bit() ? -8 : -4))),
     InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
     ELFWriterInfo(*this) {



More information about the llvm-commits mailing list