[llvm] c089f02 - [X86] Don't setup and teardown memory for a musttail call

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 6 13:09:39 PST 2019


Author: Reid Kleckner
Date: 2019-12-06T12:58:54-08:00
New Revision: c089f0289856b8f72b06c30daa7848e431c8e36e

URL: https://github.com/llvm/llvm-project/commit/c089f0289856b8f72b06c30daa7848e431c8e36e
DIFF: https://github.com/llvm/llvm-project/commit/c089f0289856b8f72b06c30daa7848e431c8e36e.diff

LOG: [X86] Don't setup and teardown memory for a musttail call

Summary:
musttail calls should not require allocating extra stack for arguments.
Updates to arguments passed in memory should happen in place before the
epilogue.

This bug was mostly a missed optimization, unless inalloca was used and
store to push conversion fired.

If a reserved call frame was used for an inalloca musttail call, the
call setup and teardown instructions would be deleted, and SP
adjustments would be inserted in the prologue and epilogue. You can see
these are removed from several test cases in this change.

In the case where the stack frame was not reserved, i.e. call frame
optimization fires and turns argument stores into pushes, then the
imbalanced call frame setup instructions created for inalloca calls
become a problem. They remain in the instruction stream, resulting in a
call setup that allocates zero bytes (expected for inalloca), and a call
teardown that deallocates the inalloca pack. This deallocation was
unbalanced, leading to subsequent crashes.

Reviewers: hans

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71097

Added: 
    llvm/test/CodeGen/X86/musttail-inalloca.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/cfguard-checks.ll
    llvm/test/CodeGen/X86/musttail-tailcc.ll
    llvm/test/CodeGen/X86/musttail-varargs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4dc2ff481785..bac8838a0adc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3808,7 +3808,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          "the only memory argument");
   }
 
-  if (!IsSibcall)
+  if (!IsSibcall && !IsMustTail)
     Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
                                  NumBytes - NumBytesToPush, dl);
 
@@ -4093,7 +4093,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
 
-  if (!IsSibcall && isTailCall) {
+  if (!IsSibcall && isTailCall && !IsMustTail) {
     Chain = DAG.getCALLSEQ_END(Chain,
                                DAG.getIntPtrConstant(NumBytesToPop, dl, true),
                                DAG.getIntPtrConstant(0, dl, true), InFlag, dl);

diff  --git a/llvm/test/CodeGen/X86/cfguard-checks.ll b/llvm/test/CodeGen/X86/cfguard-checks.ll
index efd6e7bc11b6..6e8d71411e82 100644
--- a/llvm/test/CodeGen/X86/cfguard-checks.ll
+++ b/llvm/test/CodeGen/X86/cfguard-checks.ll
@@ -171,8 +171,7 @@ entry:
 
   ; X64-LABEL: func_cf_tail
   ; X64:       leaq	target_func(%rip), %rax
-  ; X64:       movq __guard_dispatch_icall_fptr(%rip), %rcx
-  ; X64:       rex64 jmpq *%rcx            # TAILCALL
+  ; X64:       rex64 jmpq *__guard_dispatch_icall_fptr(%rip)         # TAILCALL
   ; X64-NOT:   callq
 }
 
@@ -205,7 +204,6 @@ entry:
   ; X64:            movq (%rcx), %rax
   ; X64-NEXT:       movq 8(%rax), %rax
   ; X64-NEXT:       movq __guard_dispatch_icall_fptr(%rip), %rdx
-  ; X64-NEXT:       addq $40, %rsp
   ; X64-NEXT:       rex64 jmpq *%rdx            # TAILCALL
   ; X64-NOT:   callq
 }

diff  --git a/llvm/test/CodeGen/X86/musttail-inalloca.ll b/llvm/test/CodeGen/X86/musttail-inalloca.ll
new file mode 100644
index 000000000000..c17d29c4c71d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/musttail-inalloca.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - | FileCheck %s
+
+; Previously, we would accidentally leave behind SP adjustments to setup a call
+; frame for the musttail call target, and SP adjustments would end up
+; unbalanced. Reported as https://crbug.com/1026882.
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.16.0"
+
+; 20 bytes of memory.
+%struct.Args = type { i32, i32, i32, i32, i32 }
+
+declare dso_local x86_thiscallcc void @methodWithVtorDisp(i8* nocapture readonly, <{ %struct.Args }>* inalloca)
+
+define dso_local x86_thiscallcc void @methodWithVtorDisp_thunk(i8* %0, <{ %struct.Args }>* inalloca %1) #0 {
+; CHECK-LABEL: methodWithVtorDisp_thunk:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    movl %ecx, %esi
+; CHECK-NEXT:    subl -4(%ecx), %esi
+; CHECK-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    pushl $_methodWithVtorDisp_thunk
+; CHECK-NEXT:    calll ___cyg_profile_func_exit
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    jmp _methodWithVtorDisp # TAILCALL
+  %3 = getelementptr inbounds i8, i8* %0, i32 -4
+  %4 = bitcast i8* %3 to i32*
+  %5 = load i32, i32* %4, align 4
+  %6 = sub i32 0, %5
+  %7 = getelementptr i8, i8* %0, i32 %6
+  musttail call x86_thiscallcc void @methodWithVtorDisp(i8* %7, <{ %struct.Args }>* inalloca nonnull %1)
+  ret void
+}
+
+attributes #0 = { nounwind optsize "instrument-function-exit-inlined"="__cyg_profile_func_exit"  }

diff  --git a/llvm/test/CodeGen/X86/musttail-tailcc.ll b/llvm/test/CodeGen/X86/musttail-tailcc.ll
index 6057045a77df..4de8d4e2be12 100644
--- a/llvm/test/CodeGen/X86/musttail-tailcc.ll
+++ b/llvm/test/CodeGen/X86/musttail-tailcc.ll
@@ -9,8 +9,6 @@ declare tailcc i32 @tailcallee(i32 %a1, i32 %a2)
 define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
 ; X64-LABEL: tailcaller:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    jmp tailcallee # TAILCALL
 ;
 ; X32-LABEL: tailcaller:
@@ -26,8 +24,6 @@ declare tailcc i8* @alias_callee()
 define tailcc noalias i8* @noalias_caller() nounwind {
 ; X64-LABEL: noalias_caller:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    jmp alias_callee # TAILCALL
 ;
 ; X32-LABEL: noalias_caller:
@@ -42,8 +38,6 @@ declare tailcc noalias i8* @noalias_callee()
 define tailcc i8* @alias_caller() nounwind {
 ; X64-LABEL: alias_caller:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    jmp noalias_callee # TAILCALL
 ;
 ; X32-LABEL: alias_caller:
@@ -56,25 +50,17 @@ define tailcc i8* @alias_caller() nounwind {
 define tailcc void @void_test(i32, i32, i32, i32) {
 ; X64-LABEL: void_test:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    .cfi_def_cfa_offset 16
-; X64-NEXT:    popq %rax
-; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    jmp void_test # TAILCALL
 ;
 ; X32-LABEL: void_test:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    subl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    .cfi_offset %esi, -8
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X32-NEXT:    addl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    jmp void_test # TAILCALL
@@ -86,25 +72,17 @@ define tailcc void @void_test(i32, i32, i32, i32) {
 define tailcc i1 @i1test(i32, i32, i32, i32) {
 ; X64-LABEL: i1test:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    .cfi_def_cfa_offset 16
-; X64-NEXT:    popq %rax
-; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    jmp i1test # TAILCALL
 ;
 ; X32-LABEL: i1test:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    subl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    .cfi_offset %esi, -8
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X32-NEXT:    addl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    jmp i1test # TAILCALL

diff  --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index 5f5ce5f9af91..5d88f0dd11cd 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -246,12 +246,13 @@ define void @f_thunk(i8* %this, ...) {
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    pushl %esi
 ; X86-NOSSE-NEXT:    andl $-16, %esp
-; X86-NOSSE-NEXT:    subl $48, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %esi
 ; X86-NOSSE-NEXT:    leal 12(%ebp), %eax
-; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %esi, (%esp)
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    pushl %esi
 ; X86-NOSSE-NEXT:    calll _get_f
+; X86-NOSSE-NEXT:    addl $4, %esp
 ; X86-NOSSE-NEXT:    movl %esi, 8(%ebp)
 ; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
 ; X86-NOSSE-NEXT:    popl %esi
@@ -264,17 +265,18 @@ define void @f_thunk(i8* %this, ...) {
 ; X86-SSE-NEXT:    movl %esp, %ebp
 ; X86-SSE-NEXT:    pushl %esi
 ; X86-SSE-NEXT:    andl $-16, %esp
-; X86-SSE-NEXT:    subl $96, %esp
+; X86-SSE-NEXT:    subl $80, %esp
 ; X86-SSE-NEXT:    movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; X86-SSE-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; X86-SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE-NEXT:    movaps %xmm0, (%esp) # 16-byte Spill
 ; X86-SSE-NEXT:    movl 8(%ebp), %esi
 ; X86-SSE-NEXT:    leal 12(%ebp), %eax
 ; X86-SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT:    movl %esi, (%esp)
+; X86-SSE-NEXT:    pushl %esi
 ; X86-SSE-NEXT:    calll _get_f
+; X86-SSE-NEXT:    addl $4, %esp
 ; X86-SSE-NEXT:    movl %esi, 8(%ebp)
-; X86-SSE-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-SSE-NEXT:    movaps (%esp), %xmm0 # 16-byte Reload
 ; X86-SSE-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
 ; X86-SSE-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
 ; X86-SSE-NEXT:    leal -4(%ebp), %esp
@@ -300,38 +302,21 @@ define void @f_thunk(i8* %this, ...) {
 define void @g_thunk(i8* %fptr_i8, ...) {
 ; LINUX-LABEL: g_thunk:
 ; LINUX:       # %bb.0:
-; LINUX-NEXT:    pushq %rax
-; LINUX-NEXT:    .cfi_def_cfa_offset 16
-; LINUX-NEXT:    popq %r11
-; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%rdi # TAILCALL
 ;
 ; LINUX-X32-LABEL: g_thunk:
 ; LINUX-X32:       # %bb.0:
-; LINUX-X32-NEXT:    pushq %rax
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    movl %edi, %r11d
-; LINUX-X32-NEXT:    addl $8, %esp
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: g_thunk:
 ; WINDOWS:       # %bb.0:
-; WINDOWS-NEXT:    subq $40, %rsp
-; WINDOWS-NEXT:    .seh_stackalloc 40
-; WINDOWS-NEXT:    .seh_endprologue
-; WINDOWS-NEXT:    addq $40, %rsp
 ; WINDOWS-NEXT:    rex64 jmpq *%rcx # TAILCALL
-; WINDOWS-NEXT:    .seh_handlerdata
-; WINDOWS-NEXT:    .text
-; WINDOWS-NEXT:    .seh_endproc
 ;
 ; X86-LABEL: g_thunk:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    popl %ecx
 ; X86-NEXT:    jmpl *%eax # TAILCALL
   %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
   musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
@@ -347,78 +332,53 @@ define void @g_thunk(i8* %fptr_i8, ...) {
 define void @h_thunk(%struct.Foo* %this, ...) {
 ; LINUX-LABEL: h_thunk:
 ; LINUX:       # %bb.0:
-; LINUX-NEXT:    pushq %rax
-; LINUX-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-NEXT:    cmpb $1, (%rdi)
 ; LINUX-NEXT:    jne .LBB2_2
 ; LINUX-NEXT:  # %bb.1: # %then
 ; LINUX-NEXT:    movq 8(%rdi), %r11
-; LINUX-NEXT:    addq $8, %rsp
-; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-NEXT:  .LBB2_2: # %else
-; LINUX-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-NEXT:    movq 16(%rdi), %r11
 ; LINUX-NEXT:    movl $42, {{.*}}(%rip)
-; LINUX-NEXT:    addq $8, %rsp
-; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; LINUX-X32-LABEL: h_thunk:
 ; LINUX-X32:       # %bb.0:
-; LINUX-X32-NEXT:    pushq %rax
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    cmpb $1, (%edi)
 ; LINUX-X32-NEXT:    jne .LBB2_2
 ; LINUX-X32-NEXT:  # %bb.1: # %then
 ; LINUX-X32-NEXT:    movl 4(%edi), %r11d
-; LINUX-X32-NEXT:    addl $8, %esp
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-X32-NEXT:  .LBB2_2: # %else
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    movl 8(%edi), %r11d
 ; LINUX-X32-NEXT:    movl $42, {{.*}}(%rip)
-; LINUX-X32-NEXT:    addl $8, %esp
-; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: h_thunk:
 ; WINDOWS:       # %bb.0:
-; WINDOWS-NEXT:    subq $40, %rsp
-; WINDOWS-NEXT:    .seh_stackalloc 40
-; WINDOWS-NEXT:    .seh_endprologue
 ; WINDOWS-NEXT:    cmpb $1, (%rcx)
 ; WINDOWS-NEXT:    jne .LBB2_2
 ; WINDOWS-NEXT:  # %bb.1: # %then
 ; WINDOWS-NEXT:    movq 8(%rcx), %rax
-; WINDOWS-NEXT:    addq $40, %rsp
 ; WINDOWS-NEXT:    rex64 jmpq *%rax # TAILCALL
 ; WINDOWS-NEXT:  .LBB2_2: # %else
 ; WINDOWS-NEXT:    movq 16(%rcx), %rax
 ; WINDOWS-NEXT:    movl $42, {{.*}}(%rip)
-; WINDOWS-NEXT:    addq $40, %rsp
 ; WINDOWS-NEXT:    rex64 jmpq *%rax # TAILCALL
-; WINDOWS-NEXT:    .seh_handlerdata
-; WINDOWS-NEXT:    .text
-; WINDOWS-NEXT:    .seh_endproc
 ;
 ; X86-LABEL: h_thunk:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    cmpb $1, (%eax)
 ; X86-NEXT:    jne LBB2_2
 ; X86-NEXT:  # %bb.1: # %then
 ; X86-NEXT:    movl 4(%eax), %ecx
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    popl %eax
 ; X86-NEXT:    jmpl *%ecx # TAILCALL
 ; X86-NEXT:  LBB2_2: # %else
 ; X86-NEXT:    movl 8(%eax), %ecx
 ; X86-NEXT:    movl $42, _g
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    popl %eax
 ; X86-NEXT:    jmpl *%ecx # TAILCALL
   %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
   %cond = load i1, i1* %cond_p


        


More information about the llvm-commits mailing list