[llvm] r327957 - [X86] Properly implement the calling convention for f80 for mingw/x86_64

Mon Mar 19 23:19:39 PDT 2018

Author: mstorsjo
Date: Mon Mar 19 23:19:38 2018
New Revision: 327957

URL: http://llvm.org/viewvc/llvm-project?rev=327957&view=rev
Log:
[X86] Properly implement the calling convention for f80 for mingw/x86_64

In these cases, both parameters and return values are passed
as a pointer to a stack allocation.

MSVC doesn't use the f80 data type at all, while it is used
for long doubles on mingw.

Normally, this part of the calling convention is handled
within clang, but for intrinsics that are lowered to libcalls,
it may need to be handled within llvm as well.

Differential Revision: https://reviews.llvm.org/D44592

Added:
    llvm/trunk/test/CodeGen/X86/win64-long-double.ll
Modified:
    llvm/trunk/lib/Target/X86/X86CallingConv.td
    llvm/trunk/test/CodeGen/X86/finite-libcalls.ll
    llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=327957&r1=327956&r2=327957&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Mar 19 23:19:38 2018
@@ -18,6 +18,12 @@ class CCIfSubtarget<string F, CCAction A
                        "(State.getMachineFunction().getSubtarget()).", F),
            A>;
 
+/// CCIfNotSubtarget - Match if the current subtarget doesn't has a feature F.
+class CCIfNotSubtarget<string F, CCAction A>
+    : CCIf<!strconcat("!static_cast<const X86Subtarget&>"
+                       "(State.getMachineFunction().getSubtarget()).", F),
+           A>;
+
 // Register classes for RegCall
 class RC_X86_RegCall {
   list<Register> GPR_8 = [];
@@ -246,8 +252,9 @@ def RetCC_X86Common : CallingConv<[
   // MM0, it doesn't support these vector types.
   CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
 
-  // Long double types are always returned in FP0 (even with SSE).
-  CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>
+  // Long double types are always returned in FP0 (even with SSE),
+  // except on Win64.
+  CCIfNotSubtarget<"isTargetWin64()", CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>>
 ]>;
 
 // X86-32 C return-value convention.
@@ -605,6 +612,9 @@ def CC_X86_Win64_C : CallingConv<[
   // 512 bit vectors are passed by pointer
   CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
 
+  // Long doubles are passed by pointer
+  CCIfType<[f80], CCPassIndirect<i64>>,
+
   // The first 4 MMX vector arguments are passed in GPRs.
   CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
@@ -628,11 +638,7 @@ def CC_X86_Win64_C : CallingConv<[
 
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
-  // Long doubles get stack slots whose size and alignment depends on the
-  // subtarget.
-  CCIfType<[f80], CCAssignToStack<0, 0>>
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
 ]>;
 
 def CC_X86_Win64_VectorCall : CallingConv<[

Modified: llvm/trunk/test/CodeGen/X86/finite-libcalls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/finite-libcalls.ll?rev=327957&r1=327956&r2=327957&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/finite-libcalls.ll (original)
+++ llvm/trunk/test/CodeGen/X86/finite-libcalls.ll Mon Mar 19 23:19:38 2018
@@ -50,11 +50,19 @@ define x86_fp80 @exp_f80(x86_fp80 %x) #0
 ;
 ; WIN-LABEL: exp_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq expl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: exp_f80:
@@ -113,11 +121,19 @@ define x86_fp80 @exp2_f80(x86_fp80 %x) #
 ;
 ; WIN-LABEL: exp2_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq exp2l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: exp2_f80:
@@ -176,11 +192,19 @@ define x86_fp80 @log_f80(x86_fp80 %x) #0
 ;
 ; WIN-LABEL: log_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq logl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: log_f80:
@@ -239,11 +263,19 @@ define x86_fp80 @log2_f80(x86_fp80 %x) #
 ;
 ; WIN-LABEL: log2_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq log2l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: log2_f80:
@@ -302,11 +334,19 @@ define x86_fp80 @log10_f80(x86_fp80 %x)
 ;
 ; WIN-LABEL: log10_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq log10l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: log10_f80:
@@ -373,13 +413,22 @@ define x86_fp80 @pow_f80(x86_fp80 %x) #0
 ;
 ; WIN-LABEL: pow_f80:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    fldt (%rdx)
 ; WIN-NEXT:    fld %st(0)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq powl
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT:    fstpt (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; MAC-LABEL: pow_f80:

Modified: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll?rev=327957&r1=327956&r2=327957&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll Mon Mar 19 23:19:38 2018
@@ -918,7 +918,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwi
 ; AVX512_64_WIN-LABEL: x_to_u64:
 ; AVX512_64_WIN:       # %bb.0:
 ; AVX512_64_WIN-NEXT:    pushq %rax
-; AVX512_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX512_64_WIN-NEXT:    fldt (%rcx)
 ; AVX512_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; AVX512_64_WIN-NEXT:    fld %st(1)
 ; AVX512_64_WIN-NEXT:    fsub %st(1)
@@ -1003,25 +1003,25 @@ define i64 @x_to_u64(x86_fp80 %a) nounwi
 ;
 ; SSE3_64_WIN-LABEL: x_to_u64:
 ; SSE3_64_WIN:       # %bb.0:
-; SSE3_64_WIN-NEXT:    subq $24, %rsp
-; SSE3_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT:    subq $16, %rsp
+; SSE3_64_WIN-NEXT:    fldt (%rcx)
 ; SSE3_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE3_64_WIN-NEXT:    fld %st(1)
 ; SSE3_64_WIN-NEXT:    fsub %st(1)
 ; SSE3_64_WIN-NEXT:    fisttpll {{[0-9]+}}(%rsp)
 ; SSE3_64_WIN-NEXT:    fld %st(1)
-; SSE3_64_WIN-NEXT:    fisttpll {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
 ; SSE3_64_WIN-NEXT:    fucompi %st(1)
 ; SSE3_64_WIN-NEXT:    fstp %st(0)
 ; SSE3_64_WIN-NEXT:    jbe .LBB4_1
 ; SSE3_64_WIN-NEXT:  # %bb.2:
-; SSE3_64_WIN-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; SSE3_64_WIN-NEXT:    addq $24, %rsp
+; SSE3_64_WIN-NEXT:    movq (%rsp), %rax
+; SSE3_64_WIN-NEXT:    addq $16, %rsp
 ; SSE3_64_WIN-NEXT:    retq
 ; SSE3_64_WIN-NEXT:  .LBB4_1:
 ; SSE3_64_WIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
 ; SSE3_64_WIN-NEXT:    xorq {{[0-9]+}}(%rsp), %rax
-; SSE3_64_WIN-NEXT:    addq $24, %rsp
+; SSE3_64_WIN-NEXT:    addq $16, %rsp
 ; SSE3_64_WIN-NEXT:    retq
 ;
 ; SSE3_64_LIN-LABEL: x_to_u64:
@@ -1103,7 +1103,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwi
 ; SSE2_64_WIN-LABEL: x_to_u64:
 ; SSE2_64_WIN:       # %bb.0:
 ; SSE2_64_WIN-NEXT:    subq $24, %rsp
-; SSE2_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; SSE2_64_WIN-NEXT:    fldt (%rcx)
 ; SSE2_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE2_64_WIN-NEXT:    fld %st(1)
 ; SSE2_64_WIN-NEXT:    fsub %st(1)
@@ -1268,7 +1268,7 @@ define i64 @x_to_s64(x86_fp80 %a) nounwi
 ; AVX512_64_WIN-LABEL: x_to_s64:
 ; AVX512_64_WIN:       # %bb.0:
 ; AVX512_64_WIN-NEXT:    pushq %rax
-; AVX512_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX512_64_WIN-NEXT:    fldt (%rcx)
 ; AVX512_64_WIN-NEXT:    fisttpll (%rsp)
 ; AVX512_64_WIN-NEXT:    movq (%rsp), %rax
 ; AVX512_64_WIN-NEXT:    popq %rcx
@@ -1308,7 +1308,7 @@ define i64 @x_to_s64(x86_fp80 %a) nounwi
 ; SSE3_64_WIN-LABEL: x_to_s64:
 ; SSE3_64_WIN:       # %bb.0:
 ; SSE3_64_WIN-NEXT:    pushq %rax
-; SSE3_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT:    fldt (%rcx)
 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
 ; SSE3_64_WIN-NEXT:    movq (%rsp), %rax
 ; SSE3_64_WIN-NEXT:    popq %rcx
@@ -1359,8 +1359,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwi
 ;
 ; SSE2_64_WIN-LABEL: x_to_s64:
 ; SSE2_64_WIN:       # %bb.0:
-; SSE2_64_WIN-NEXT:    subq $24, %rsp
-; SSE2_64_WIN-NEXT:    fldt {{[0-9]+}}(%rsp)
+; SSE2_64_WIN-NEXT:    subq $16, %rsp
+; SSE2_64_WIN-NEXT:    fldt (%rcx)
 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
 ; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
@@ -1369,7 +1369,7 @@ define i64 @x_to_s64(x86_fp80 %a) nounwi
 ; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; SSE2_64_WIN-NEXT:    addq $24, %rsp
+; SSE2_64_WIN-NEXT:    addq $16, %rsp
 ; SSE2_64_WIN-NEXT:    retq
 ;
 ; SSE2_64_LIN-LABEL: x_to_s64:

Added: llvm/trunk/test/CodeGen/X86/win64-long-double.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64-long-double.ll?rev=327957&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64-long-double.ll (added)
+++ llvm/trunk/test/CodeGen/X86/win64-long-double.ll Mon Mar 19 23:19:38 2018
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple x86_64-w64-mingw32 %s -o - | FileCheck %s
+
+ at glob = common dso_local local_unnamed_addr global x86_fp80 0xK00000000000000000000, align 16
+
+define dso_local void @call() {
+entry:
+  %0 = load x86_fp80, x86_fp80* @glob, align 16
+  %1 = tail call x86_fp80 @floorl(x86_fp80 %0)
+  store x86_fp80 %1, x86_fp80* @glob, align 16
+  ret void
+}
+
+declare x86_fp80 @floorl(x86_fp80)
+
+; CHECK-LABEL: call
+; CHECK: fldt glob(%rip)
+; CHECK: fstpt [[ARGOFF:[0-9]+]](%rsp)
+; CHECK: leaq [[RETOFF:[0-9]+]](%rsp), %rcx
+; CHECK: leaq [[ARGOFF]](%rsp), %rdx
+; CHECK: callq floorl
+; CHECK: fldt [[RETOFF]](%rsp)
+; CHECK: fstpt glob(%rip)