[llvm] 9d7d34c - [X86][MS] Fix the aligement mismatch of vector variable arguments on Win32

Tue Sep 7 18:27:01 PDT 2021

Author: Wang, Pengfei
Date: 2021-09-08T09:26:44+08:00
New Revision: 9d7d34c7691ad89cb0d7529e6a0064026acd6dca

URL: https://github.com/llvm/llvm-project/commit/9d7d34c7691ad89cb0d7529e6a0064026acd6dca
DIFF: https://github.com/llvm/llvm-project/commit/9d7d34c7691ad89cb0d7529e6a0064026acd6dca.diff

LOG: [X86][MS] Fix the aligement mismatch of vector variable arguments on Win32

The alignment of vector variable arguments in callee side is 4, which is
aligned with MSVC. But the caller aligns them to the size of vector
arguments. It results in run fails. This patch fixes this problem by
trimming it to 4 bytes for variable arguments on Win32.

Fixed vector arguments are passed by pointer on Win32. So they don't have
the problem.

I don't find a doc in MSDN for this calling conversion, so I did several
experiments here: https://godbolt.org/z/n1zn1Gx1z

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D108887

Added: 
    llvm/test/CodeGen/X86/vaargs-win32.ll

Modified: 
    llvm/lib/Target/X86/X86CallingConv.td
    llvm/test/CodeGen/X86/win32-spill-xmm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 866483d64693a..63757ccc20a93 100644

--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -23,6 +23,13 @@ class CCIfNotSubtarget<string F, CCAction A>
                        "(State.getMachineFunction().getSubtarget()).", F),
            A>;
 
+/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
+class CCIfIsVarArgOnWin<CCAction A>
+    : CCIf<"State.isVarArg() && "
+           "State.getMachineFunction().getSubtarget().getTargetTriple()."
+           "isWindowsMSVCEnvironment()",
+           A>;
+
 // Register classes for RegCall
 class RC_X86_RegCall {
   list<Register> GPR_8 = [];
@@ -771,6 +778,22 @@ def CC_X86_32_Vector_Common : CallingConv<[
            CCAssignToStack<64, 64>>
 ]>;
 
+/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
+/// values are spilled on the stack.
+def CC_X86_Win32_Vector : CallingConv<[
+  // Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
+           CCAssignToStack<16, 4>>,
+
+  // 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
+           CCAssignToStack<32, 4>>,
+
+  // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
+           CCAssignToStack<64, 4>>
+]>;
+
 // CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
 // vector registers
 def CC_X86_32_Vector_Standard : CallingConv<[
@@ -787,6 +810,7 @@ def CC_X86_32_Vector_Standard : CallingConv<[
   CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
                 CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
 
+  CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
   CCDelegateTo<CC_X86_32_Vector_Common>
 ]>;
 

diff  --git a/llvm/test/CodeGen/X86/vaargs-win32.ll b/llvm/test/CodeGen/X86/vaargs-win32.ll
new file mode 100644
index 0000000000000..9cb40916e263e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vaargs-win32.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s --check-prefix=MSVC
+; RUN: llc -mcpu=generic -mtriple=i686-pc-mingw32 -mattr=+sse < %s | FileCheck %s --check-prefix=MINGW
+
+ at a = external dso_local global <4 x float>, align 16
+
+define dso_local void @testPastArguments() nounwind {
+; MSVC-LABEL: testPastArguments:
+; MSVC:       # %bb.0: # %entry
+; MSVC-NEXT:    subl $20, %esp
+; MSVC-NEXT:    movaps _a, %xmm0
+; MSVC-NEXT:    movups %xmm0, 4(%esp)
+; MSVC-NEXT:    movl $1, (%esp)
+; MSVC-NEXT:    calll _testm128
+; MSVC-NEXT:    addl $20, %esp
+; MSVC-NEXT:    retl
+;
+; MINGW-LABEL: testPastArguments:
+; MINGW:       # %bb.0: # %entry
+; MINGW-NEXT:    pushl %ebp
+; MINGW-NEXT:    movl %esp, %ebp
+; MINGW-NEXT:    andl $-16, %esp
+; MINGW-NEXT:    subl $48, %esp
+; MINGW-NEXT:    movaps _a, %xmm0
+; MINGW-NEXT:    movaps %xmm0, 16(%esp)
+; MINGW-NEXT:    movl $1, (%esp)
+; MINGW-NEXT:    calll _testm128
+; MINGW-NEXT:    movl %ebp, %esp
+; MINGW-NEXT:    popl %ebp
+; MINGW-NEXT:    retl
+entry:
+  %0 = load <4 x float>, <4 x float>* @a, align 16
+  %call = tail call i32 (i32, ...) @testm128(i32 1, <4 x float> inreg %0)
+  ret void
+}
+
+declare i32 @testm128(i32, ...) nounwind

diff  --git a/llvm/test/CodeGen/X86/win32-spill-xmm.ll b/llvm/test/CodeGen/X86/win32-spill-xmm.ll
index c6b163b88b24d..0ab87a751e4bf 100644
--- a/llvm/test/CodeGen/X86/win32-spill-xmm.ll
+++ b/llvm/test/CodeGen/X86/win32-spill-xmm.ll
@@ -20,7 +20,7 @@ declare void @bar(<16 x float> %a, i32 %b)
 ; Check that proper alignment of spilled vector does not affect vargs
 
 ; CHECK-LABEL: vargs_not_affected
-; CHECK: movl 28(%ebp), %eax
+; CHECK: movl 28(%esp), %eax
 define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
 entry:
   %ap = alloca i8*, align 4