[PATCH] D22315: Teach fast isel about thiscall (and callee-pop) calls.
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 13 17:30:10 PDT 2016
rnk added inline comments.
================
Comment at: lib/Target/X86/X86FastISel.cpp:3337
@@ +3336,3 @@
+ TM.Options.GuaranteedTailCallOpt)
+ ? NumBytes // Callee pops everything.
+ : computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
----------------
thakis wrote:
> rnk wrote:
> > I don't think this is true if stack realignment is active. NumBytes is the argument size rounded up to stack alignment. Make a test case with alloca align 32 to see if there's a bug.
> It seems to work (if I understood your example request correctly – this is with a locally-hacked up version that supports stdcall in fast isel as well):
> ```
> Nicos-MBP:llvm-build thakis$ cat test.ll
> source_filename = "fastisel.cc"
> target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
> target triple = "i386-pc-windows-msvc18.0.0"
>
> define void @myfun() #0 {
> entry:
> %i = alloca i32, align 32
> store i32 43, i32* %i, align 4
> %0 = load i32, i32* %i, align 4
> call x86_stdcallcc void @stdcallfun(i32 %0)
> ret void
> }
>
> declare x86_stdcallcc void @stdcallfun(i32) #1
> Nicos-MBP:llvm-build thakis$ bin/llc -no-x86-call-frame-opt -O0 -fast-isel-verbose < test.ll
> ...
> _myfun: # @myfun
> # BB#0: # %entry
> pushl %ebp
> movl %esp, %ebp
> andl $-32, %esp
> subl $64, %esp
> movl $43, 32(%esp)
> movl 32(%esp), %eax
> movl %eax, (%esp)
> calll _stdcallfun at 4
> movl %ebp, %esp
> popl %ebp
> retl
> ```
>
> This matches what X86ISelLowering.cpp does, and I built a few test binaries with this that worked too. Intuitively, I think it works because the call frame is aligned as much as the parameters are, not as much as the arguments are – and the callee-pop pops the parameter size.
I see, CCInfo.getAlignedCallFrameSize() doesn't round up to stack alignment, it rounds up to the alignment of the parameters. I was concerned about what when the outgoing stack alignment is 16 instead of 4, and we need to emit cleanup adjustments. This probably only happens on Linux, so this is something like the test case I was thinking about:
```
$ cat t.ll
target triple = "i686-linux"
declare x86_stdcallcc void @stdcallfun(i32*)
define void @myfun(i32 %n) {
entry:
%i = alloca i32
call x86_stdcallcc void @stdcallfun(i32* %i)
%j = alloca i32, i32 %n
call x86_stdcallcc void @stdcallfun(i32* %j)
store volatile i32 0, i32* %i
ret void
}
$ llc t.ll -o - -mtriple=i686-linux -no-x86-call-frame-opt
...
subl $16, %esp
leal -8(%ebp), %eax
movl %eax, (%esp)
calll stdcallfun
addl $12, %esp
movl %esp, %eax
leal 15(,%esi,4), %ecx
andl $-16, %ecx
subl %ecx, %eax
movl %eax, %esp
subl $16, %esp # ADJCALLSTACKDOWN uses 16 bytes
movl %eax, (%esp)
calll stdcallfun
addl $12, %esp # The callee only pops 4 bytes, not 16, so we need to add 12 to esp
...
```
================
Comment at: lib/Target/X86/X86FastISel.cpp:3338
@@ -3340,1 +3337,3 @@
+ ? NumBytes // Callee pops everything.
+ : computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
----------------
This computeBytesPoppedByCallee function only handles popping hidden sret parameters, which has to be one of the weirdest 32-bit ABI corner cases ever. Maybe rename it computeBytesPoppedByCalleeForSRet?
================
Comment at: test/CodeGen/X86/fast-isel-call.ll:59
@@ +58,3 @@
+; STDERR-NOT: FastISel missed call: call x86_thiscallcc void @thiscallfun
+; CHECK: test5:
+%struct.S = type { i8 }
----------------
We should check for correct code
================
Comment at: test/CodeGen/X86/fast-isel-call.ll:64
@@ +63,3 @@
+ %s = alloca %struct.S, align 1
+ call x86_thiscallcc void @thiscallfun(%struct.S* %s, i32 43)
+ ret void
----------------
If you add a store to an alloca after the call, it will ensure that the call sequence end stack adjustment isn't eliminated as dead code.
================
Comment at: test/CodeGen/X86/fast-isel-call.ll:66
@@ +65,3 @@
+ ret void
+}
+declare x86_thiscallcc void @thiscallfun(%struct.S*, i32) #1
----------------
I would suggest testing with dynamic allocas, but fastisel can't handle them. =/
http://reviews.llvm.org/D22315
More information about the llvm-commits
mailing list