[LLVMdev] Win64 Calling Convention problem

Thu Dec 3 08:57:30 PST 2009

Hi!

I have discovered a problem with LLVM's interpretation of the Win64
calling convention w.r.t. passing of aggregates as arguments. The
following code is part of my host application that is compiled with
Visual Studio 2005 in 64-bit debug mode. noise4 expects a structure of
four floats as its first and only argument, which is - in accordance
with the specs of the Win64 calling convention - passed by pointer.

--- snip ---
struct float4 { float x, y, z, w; }

float noise4(float4 v)
{
0000000140067AE0  mov         qword ptr [rsp+8],rcx
0000000140067AE5  push        rdi
0000000140067AE6  sub         rsp,10h
0000000140067AEA  mov         rdi,rsp
0000000140067AED  mov         rcx,4
0000000140067AF7  mov         eax,0CCCCCCCCh
0000000140067AFC  rep stos    dword ptr [rdi]
0000000140067AFE  mov         rcx,qword ptr [rsp+20h]
	return v.x + v.y;
0000000140067B03  mov         rax,qword ptr [v]
0000000140067B08  mov         rcx,qword ptr [v]
0000000140067B0D  movss       xmm0,dword ptr [rax]
0000000140067B11  addss       xmm0,dword ptr [rcx+4]
0000000140067B16  add         rsp,10h
0000000140067B1A  pop         rdi
0000000140067B1B  ret
}
--- snip ---

noise4 is supposed to be called by jitted LLVM code, just like in the
following example.

--- snip ---
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64"
target triple = "x86_64-pc-win32"

%0 = type opaque
%float4 = type { float, float, float, float }

define void @main(%float4* noalias nocapture, %0* noalias nocapture) nounwind {
  %3 = call float @"noise$float4"(%float4 { float 1.000000e+000, float
2.000000e+000, float 3.000000e+000, float 4.000000e+000 }) ; <float>
[#uses=4]
  %4 = insertvalue %float4 undef, float %3, 0     ; <%float4> [#uses=1]
  %5 = insertvalue %float4 %4, float %3, 1        ; <%float4> [#uses=1]
  %6 = insertvalue %float4 %5, float %3, 2        ; <%float4> [#uses=1]
  %7 = insertvalue %float4 %6, float %3, 3        ; <%float4> [#uses=1]
  store %float4 %7, %float4* %0
  ret void
}

declare float @"noise$float4"(%float4) nounwind readnone
--- snip ---

When compiling this module with llc (Intel assembler syntax) I get the
following code. As you can see, the float4 argument is not passed to
the noise-function by pointer. Instead, noise is treated as if it
expected four individual floats as arguments, which are passed in the
registers XMM0-XMM3.

--- snip ---
	.data
	ALIGN	4
$CPI1_0:                                                    ; constant float
	dd	1065353216                                  ; float 1.000000e+000
$CPI1_1:                                                    ; constant float
	dd	1073741824                                  ; float 2.000000e+000
$CPI1_2:                                                    ; constant float
	dd	1077936128                                  ; float 3.000000e+000
$CPI1_3:                                                    ; constant float
	dd	1082130432                                  ; float 4.000000e+000
	.text
	ALIGN	16
	.globl	_main
_main:                                                      ; @main
; BB#0:
	sub	RSP, 40
	mov	QWORD PTR [RSP + 32], RSI                   ; Spill
	mov	RSI, RCX
	movss	XMM0, DWORD PTR [RIP + ($CPI1_0)]
	movss	XMM1, DWORD PTR [RIP + ($CPI1_1)]
	movss	XMM2, DWORD PTR [RIP + ($CPI1_2)]
	movss	XMM3, DWORD PTR [RIP + ($CPI1_3)]
	call	_noise$float4
	movss	DWORD PTR [RSI + 12], XMM0
	movss	DWORD PTR [RSI + 8], XMM0
	movss	DWORD PTR [RSI + 4], XMM0
	movss	DWORD PTR [RSI], XMM0
	mov	RSI, QWORD PTR [RSP + 32]                   ; Reload
	add	RSP, 40
	ret
--- snip ---

This clearly doesn't work and I'd be glad if someone could look into
this issue. Other than that I'm pleased to say that my experiences
with 64-bit code generation on Windows have been very positive. Great
job!

Best regards,
Stephan