[PATCH] D22315: Teach fast isel about thiscall (and callee-pop) calls.

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 13 17:30:10 PDT 2016


rnk added inline comments.

================
Comment at: lib/Target/X86/X86FastISel.cpp:3337
@@ +3336,3 @@
+                       TM.Options.GuaranteedTailCallOpt)
+          ? NumBytes // Callee pops everything.
+          : computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
----------------
thakis wrote:
> rnk wrote:
> > I don't think this is true if stack realignment is active. NumBytes is the argument size rounded up to stack alignment. Make a test case with alloca align 32 to see if there's a bug.
> It seems to work (if I understood your example request correctly – this is with a locally-hacked up version that supports stdcall in fast isel as well):
> ```
> Nicos-MBP:llvm-build thakis$ cat test.ll
> source_filename = "fastisel.cc"
> target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
> target triple = "i386-pc-windows-msvc18.0.0"
> 
> define void @myfun() #0 {
> entry:
>   %i = alloca i32, align 32
>   store i32 43, i32* %i, align 4
>   %0 = load i32, i32* %i, align 4
>   call x86_stdcallcc void @stdcallfun(i32 %0)
>   ret void
> }
> 
> declare x86_stdcallcc void @stdcallfun(i32) #1
> Nicos-MBP:llvm-build thakis$ bin/llc -no-x86-call-frame-opt  -O0 -fast-isel-verbose < test.ll
> ...
> _myfun:                                 # @myfun
> # BB#0:                                 # %entry
> 	pushl	%ebp
> 	movl	%esp, %ebp
> 	andl	$-32, %esp
> 	subl	$64, %esp
> 	movl	$43, 32(%esp)
> 	movl	32(%esp), %eax
> 	movl	%eax, (%esp)
> 	calll	_stdcallfun at 4
> 	movl	%ebp, %esp
> 	popl	%ebp
> 	retl
> ```
> 
> This matches what X86ISelLowering.cpp does, and I built a few test binaries with this that worked too. Intuitively, I think it works because the call frame is aligned as much as the parameters are, not as much as the arguments are – and the callee-pop pops the parameter size.
I see, CCInfo.getAlignedCallFrameSize() doesn't round up to stack alignment, it rounds up to the alignment of the parameters. I was concerned about what when the outgoing stack alignment is 16 instead of 4, and we need to emit cleanup adjustments. This probably only happens on Linux, so this is something like the test case I was thinking about:
```
$ cat t.ll
target triple = "i686-linux"
declare x86_stdcallcc void @stdcallfun(i32*)
define void @myfun(i32 %n) {
entry:
  %i = alloca i32
  call x86_stdcallcc void @stdcallfun(i32* %i)
  %j = alloca i32, i32 %n
  call x86_stdcallcc void @stdcallfun(i32* %j)
  store volatile i32 0, i32* %i
  ret void
}

$ llc t.ll -o - -mtriple=i686-linux -no-x86-call-frame-opt
...
        subl    $16, %esp
        leal    -8(%ebp), %eax
        movl    %eax, (%esp)
        calll   stdcallfun
        addl    $12, %esp

        movl    %esp, %eax
        leal    15(,%esi,4), %ecx
        andl    $-16, %ecx
        subl    %ecx, %eax
        movl    %eax, %esp

        subl    $16, %esp  # ADJCALLSTACKDOWN uses 16 bytes
        movl    %eax, (%esp)
        calll   stdcallfun
        addl    $12, %esp    # The callee only pops 4 bytes, not 16, so we need to add 12 to esp
...
```

================
Comment at: lib/Target/X86/X86FastISel.cpp:3338
@@ -3340,1 +3337,3 @@
+          ? NumBytes // Callee pops everything.
+          : computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
----------------
This computeBytesPoppedByCallee function only handles popping hidden sret parameters, which has to be one of the weirdest 32-bit ABI corner cases ever. Maybe rename it computeBytesPoppedByCalleeForSRet?

================
Comment at: test/CodeGen/X86/fast-isel-call.ll:59
@@ +58,3 @@
+; STDERR-NOT: FastISel missed call:   call x86_thiscallcc void @thiscallfun
+; CHECK: test5:
+%struct.S = type { i8 }
----------------
We should check for correct code

================
Comment at: test/CodeGen/X86/fast-isel-call.ll:64
@@ +63,3 @@
+  %s = alloca %struct.S, align 1
+  call x86_thiscallcc void @thiscallfun(%struct.S* %s, i32 43)
+  ret void
----------------
If you add a store to an alloca after the call, it will ensure that the call sequence end stack adjustment isn't eliminated as dead code.

================
Comment at: test/CodeGen/X86/fast-isel-call.ll:66
@@ +65,3 @@
+  ret void
+}
+declare x86_thiscallcc void @thiscallfun(%struct.S*, i32) #1
----------------
I would suggest testing with dynamic allocas, but fastisel can't handle them. =/


http://reviews.llvm.org/D22315





More information about the llvm-commits mailing list