[llvm] r284108 - [X86] Basic additions to support RegCall Calling Convention.

Oren Ben Simhon via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 13 00:53:44 PDT 2016


Author: orenb
Date: Thu Oct 13 02:53:43 2016
New Revision: 284108

URL: http://llvm.org/viewvc/llvm-project?rev=284108&view=rev
Log:
[X86] Basic additions to support RegCall Calling Convention.

The Register Calling Convention (RegCall) was introduced by Intel to optimize parameter transfer on function call.
This calling convention ensures that as many values as possible are passed or returned in registers.
This commit presents the basic additions to LLVM CodeGen in order to support RegCall in X86.

Differential Revision: http://reviews.llvm.org/D25022

Added:
    llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
    llvm/trunk/test/CodeGen/X86/sse-regcall.ll
Modified:
    llvm/trunk/include/llvm/IR/CallingConv.h
    llvm/trunk/lib/AsmParser/LLLexer.cpp
    llvm/trunk/lib/AsmParser/LLParser.cpp
    llvm/trunk/lib/AsmParser/LLToken.h
    llvm/trunk/lib/IR/AsmWriter.cpp
    llvm/trunk/lib/Target/X86/X86CallingConv.h
    llvm/trunk/lib/Target/X86/X86CallingConv.td
    llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp

Modified: llvm/trunk/include/llvm/IR/CallingConv.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/CallingConv.h?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/CallingConv.h (original)
+++ llvm/trunk/include/llvm/IR/CallingConv.h Thu Oct 13 02:53:43 2016
@@ -193,6 +193,9 @@ namespace CallingConv {
     /// Calling convention for AMDGPU code object kernels.
     AMDGPU_KERNEL = 91,
 
+    /// Register calling convention used for parameters transfer optimization
+    X86_RegCall = 92,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };

Modified: llvm/trunk/lib/AsmParser/LLLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLLexer.cpp?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLLexer.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLLexer.cpp Thu Oct 13 02:53:43 2016
@@ -585,6 +585,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(intel_ocl_bicc);
   KEYWORD(x86_64_sysvcc);
   KEYWORD(x86_64_win64cc);
+  KEYWORD(x86_regcallcc);
   KEYWORD(webkit_jscc);
   KEYWORD(swiftcc);
   KEYWORD(anyregcc);

Modified: llvm/trunk/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLParser.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLParser.cpp Thu Oct 13 02:53:43 2016
@@ -1695,6 +1695,7 @@ bool LLParser::ParseOptionalCallingConv(
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_x86_regcallcc:  CC = CallingConv::X86_RegCall; break;
   case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
   case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;

Modified: llvm/trunk/lib/AsmParser/LLToken.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLToken.h?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLToken.h (original)
+++ llvm/trunk/lib/AsmParser/LLToken.h Thu Oct 13 02:53:43 2016
@@ -127,6 +127,7 @@ enum Kind {
   kw_x86_fastcallcc,
   kw_x86_thiscallcc,
   kw_x86_vectorcallcc,
+  kw_x86_regcallcc,
   kw_arm_apcscc,
   kw_arm_aapcscc,
   kw_arm_aapcs_vfpcc,

Modified: llvm/trunk/lib/IR/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AsmWriter.cpp?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AsmWriter.cpp (original)
+++ llvm/trunk/lib/IR/AsmWriter.cpp Thu Oct 13 02:53:43 2016
@@ -311,6 +311,7 @@ static void PrintCallingConv(unsigned cc
   case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
   case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
   case CallingConv::X86_ThisCall:  Out << "x86_thiscallcc"; break;
+  case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.h?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.h (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.h Thu Oct 13 02:53:43 2016
@@ -43,6 +43,13 @@ inline bool CC_X86_AnyReg_Error(unsigned
   return false;
 }
 
+inline bool CC_X86_RegCall_Error(unsigned &, MVT &, MVT &,
+                                 CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                 CCState &) {
+  report_fatal_error("LLVM x86 RegCall calling convention implementation" \
+    " doesn't support long double and mask types yet.");
+}
+
 inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
                                          MVT &LocVT,
                                          CCValAssign::LocInfo &LocInfo,

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Thu Oct 13 02:53:43 2016
@@ -18,6 +18,164 @@ class CCIfSubtarget<string F, CCAction A
                        "(State.getMachineFunction().getSubtarget()).", F),
            A>;
 
+// Register classes for RegCall
+class RC_X86_RegCall {
+  list<Register> GPR_8 = [];
+  list<Register> GPR_16 = [];
+  list<Register> GPR_32 = [];
+  list<Register> GPR_64 = [];
+  list<Register> XMM = [];
+  list<Register> YMM = [];
+  list<Register> ZMM = [];
+}
+
+// RegCall register classes for 32 bits
+def RC_X86_32_RegCall : RC_X86_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL];
+  let GPR_16 = [AX, CX, DX, DI, SI];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI];
+  let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
+                      ///< \todo Fix AssignToReg to enable empty lists
+  let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+  let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
+  let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
+}
+
+class RC_X86_64_RegCall : RC_X86_RegCall {
+  let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
+  let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+             YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15];
+  let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7,
+             ZMM8, ZMM9, ZMM10, ZMM11, ZMM12, ZMM13, ZMM14, ZMM15];
+}
+
+def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R10B, R11B, R12B, R14B, R15B];
+  let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R10W, R11W, R12W, R14W, R15W];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R10D, R11D, R12D, R14D, R15D];
+  let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
+}
+
+def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
+  let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R12D, R13D, R14D, R15D];
+  let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R12, R13, R14, R15];
+}
+
+// X86-64 Intel regcall calling convention.
+multiclass X86_RegCall_base<RC_X86_RegCall RC> {
+def CC_#NAME : CallingConv<[
+  // Handles byval parameters.
+    CCIfSubtarget<"is64Bit()", CCIfByVal<CCPassByVal<8, 8>>>,
+    CCIfByVal<CCPassByVal<4, 4>>,
+
+    // Promote i1/i8/i16 arguments to i32.
+    CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+    // bool, char, int, enum, long, pointer --> GPR
+    CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+    // TODO: Handle the case of mask types (v*i1)
+    // TODO: Handle the case of 32 bit machine with v64i1 argument 
+    //       (split to 2 registers)
+    CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
+
+    // long long, __int64 --> GPR
+    CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+    // TODO: Handle the case of long double (f80)
+    CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
+
+    // float, double, float128 --> XMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[f32, f64, f128], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m128, __m128i, __m128d --> XMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m256, __m256i, __m256d --> YMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+    // __m512, __m512i, __m512d --> ZMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+      CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
+
+    // If no register was found -> assign to stack
+
+    // In 64 bit, assign 64/32 bit values to 8 byte stack
+    CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64], 
+      CCAssignToStack<8, 8>>>,
+
+    // In 32 bit, assign 64/32 bit values to 8/4 byte stack
+    CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+    CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+    // MMX type gets 8 byte slot in stack , while alignment depends on target
+    CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
+    CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
+
+    // float 128 get stack slots whose size and alignment depends 
+    // on the subtarget.
+    CCIfType<[f128], CCAssignToStack<0, 0>>,
+
+    // Vectors get 16-byte stack slots that are 16-byte aligned.
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCAssignToStack<16, 16>>,
+
+    // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCAssignToStack<32, 32>>,
+
+    // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
+    CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
+]>;
+
+def RetCC_#NAME : CallingConv<[
+    // Promote i1 arguments to i8.
+    CCIfType<[i1], CCPromoteToType<i8>>,
+
+    // bool, char, int, enum, long, pointer --> GPR
+    CCIfType<[i8], CCAssignToReg<RC.GPR_8>>,
+    CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
+    CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+    // TODO: Handle the case of mask types (v*i1)
+    // TODO: Handle the case of 32 bit machine with v64i1 argument 
+    //  (split to 2 registers)
+    CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
+
+    // long long, __int64 --> GPR
+    CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+    // long double --> FP
+    CCIfType<[f80], CCAssignToReg<[FP0]>>,
+
+    // float, double, float128 --> XMM
+    CCIfType<[f32, f64, f128], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m128, __m128i, __m128d --> XMM
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m256, __m256i, __m256d --> YMM
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+    // __m512, __m512i, __m512d --> ZMM
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+      CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
+]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Return Value Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -237,6 +395,14 @@ def RetCC_X86_64_HHVM: CallingConv<[
                                  RAX, R10, R11, R13, R14, R15]>>
 ]>;
 
+
+defm X86_32_RegCall :
+	 X86_RegCall_base<RC_X86_32_RegCall>;
+defm X86_Win64_RegCall :
+     X86_RegCall_base<RC_X86_64_RegCall_Win>;
+defm X86_SysV64_RegCall :
+     X86_RegCall_base<RC_X86_64_RegCall_SysV>;
+
 // This is the root return-value convention for the X86-32 backend.
 def RetCC_X86_32 : CallingConv<[
   // If FastCC, use RetCC_X86_32_Fast.
@@ -244,6 +410,7 @@ def RetCC_X86_32 : CallingConv<[
   // If HiPE, use RetCC_X86_32_HiPE.
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
 
   // Otherwise, use RetCC_X86_32_C.
   CCDelegateTo<RetCC_X86_32_C>
@@ -268,6 +435,11 @@ def RetCC_X86_64 : CallingConv<[
   // Handle HHVM calls.
   CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
 
+  CCIfCC<"CallingConv::X86_RegCall",
+          CCIfSubtarget<"isTargetWin64()",
+                        CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
+          
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
 
@@ -817,6 +989,7 @@ def CC_X86_32 : CallingConv<[
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
 
   // Otherwise, drop to normal X86-32 CC
   CCDelegateTo<CC_X86_32_C>
@@ -833,6 +1006,9 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
   CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
   CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+  CCIfCC<"CallingConv::X86_RegCall",
+    CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
   CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
 
   // Mingw64 and native Win64 use Win64 CC
@@ -936,3 +1112,17 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeS
 
 // Only R12 is preserved for PHP calls in HHVM.
 def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
+
+// Register calling convention preserves few GPR and XMM8-15
+def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
+def CSR_32_RegCall       : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
+                                           (sequence "XMM%u", 4, 7))>;                                            
+def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+                                              (sequence "R%u", 10, 15))>;
+def CSR_Win64_RegCall       : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,                                  
+                                              (sequence "XMM%u", 8, 15))>;
+def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+                                               (sequence "R%u", 12, 15))>;
+def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
+                                               (sequence "XMM%u", 8, 15))>;
+                                               

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=284108&r1=284107&r2=284108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Thu Oct 13 02:53:43 2016
@@ -305,6 +305,19 @@ X86RegisterInfo::getCalleeSavedRegs(cons
   }
   case CallingConv::HHVM:
     return CSR_64_HHVM_SaveList;
+  case CallingConv::X86_RegCall:
+    if (Is64Bit) {
+      if (IsWin64) {
+        return (HasSSE ? CSR_Win64_RegCall_SaveList : 
+                         CSR_Win64_RegCall_NoSSE_SaveList);
+      } else {
+        return (HasSSE ? CSR_SysV64_RegCall_SaveList : 
+                         CSR_SysV64_RegCall_NoSSE_SaveList);
+      }
+    } else {
+      return (HasSSE ? CSR_32_RegCall_SaveList : 
+                       CSR_32_RegCall_NoSSE_SaveList);
+    }
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_SaveList;
@@ -406,6 +419,19 @@ X86RegisterInfo::getCallPreservedMask(co
   }
   case CallingConv::HHVM:
     return CSR_64_HHVM_RegMask;
+  case CallingConv::X86_RegCall:
+    if (Is64Bit) {
+      if (IsWin64) { 
+        return (HasSSE ? CSR_Win64_RegCall_RegMask : 
+                         CSR_Win64_RegCall_NoSSE_RegMask);
+      } else {
+        return (HasSSE ? CSR_SysV64_RegCall_RegMask : 
+                         CSR_SysV64_RegCall_NoSSE_RegMask);
+      }
+    } else {
+      return (HasSSE ? CSR_32_RegCall_RegMask : 
+                       CSR_32_RegCall_NoSSE_RegMask);
+    }
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_RegMask;

Added: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll?rev=284108&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll Thu Oct 13 02:53:43 2016
@@ -0,0 +1,607 @@
+; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck --check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck --check-prefix=LINUXOSX64 %s 
+
+; X32-LABEL:  test_argReti1:
+; X32:        kmov{{.*}}  %eax, %k{{[0-7]}}
+; X32:        kmov{{.*}}  %k{{[0-7]}}, %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti1:
+; WIN64:        kmov{{.*}}  %eax, %k{{[0-7]}}
+; WIN64:        kmov{{.*}}  %k{{[0-7]}}, %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i1
+define x86_regcallcc i1 @test_argReti1(i1 %a)  {
+  %add = add i1 %a, 1
+  ret i1 %add
+}
+
+; X32-LABEL:  test_CallargReti1:
+; X32:        kmov{{.*}}  %k{{[0-7]}}, %eax
+; X32:        call{{.*}}   {{.*}}test_argReti1
+; X32:        kmov{{.*}}  %eax, %k{{[0-7]}}
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti1:
+; WIN64:        kmov{{.*}}  %k{{[0-7]}}, %eax
+; WIN64:        call{{.*}}   {{.*}}test_argReti1
+; WIN64:        kmov{{.*}}  %eax, %k{{[0-7]}}
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i1
+define x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
+  %b = add i1 %a, 1
+  %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
+  %d = add i1 %c, 1
+  ret i1 %d
+}
+
+; X32-LABEL:  test_argReti8:
+; X32:        incb  %al
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti8:
+; WIN64:        incb %al
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i8
+define x86_regcallcc i8 @test_argReti8(i8 %a)  {
+  %add = add i8 %a, 1
+  ret i8 %add
+}
+
+; X32-LABEL:  test_CallargReti8:
+; X32:        incb %al
+; X32:        call{{.*}}   {{.*}}test_argReti8
+; X32:        incb %al
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti8:
+; WIN64:        incb %al
+; WIN64:        call{{.*}}   {{.*}}test_argReti8
+; WIN64:        incb %al
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i8
+define x86_regcallcc i8 @test_CallargReti8(i8 %a)  {
+  %b = add i8 %a, 1
+  %c = call x86_regcallcc i8 @test_argReti8(i8 %b)
+  %d = add i8 %c, 1
+  ret i8 %d
+}
+
+; X32-LABEL:  test_argReti16:
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti16:
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i16
+define x86_regcallcc i16 @test_argReti16(i16 %a)  {
+  %add = add i16 %a, 1
+  ret i16 %add
+}
+
+; X32-LABEL:  test_CallargReti16:
+; X32:        incl %eax
+; X32:        call{{.*}}   {{.*}}test_argReti16
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti16:
+; WIN64:        incl %eax
+; WIN64:        call{{.*}}   {{.*}}test_argReti16
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i16
+define x86_regcallcc i16 @test_CallargReti16(i16 %a)  {
+  %b = add i16 %a, 1
+  %c = call x86_regcallcc i16 @test_argReti16(i16 %b)
+  %d = add i16 %c, 1
+  ret i16 %d
+}
+
+; X32-LABEL:  test_argReti32:
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti32:
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i32
+define x86_regcallcc i32 @test_argReti32(i32 %a)  {
+  %add = add i32 %a, 1
+  ret i32 %add
+}
+
+; X32-LABEL:  test_CallargReti32:
+; X32:        incl %eax
+; X32:        call{{.*}}   {{.*}}test_argReti32
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti32:
+; WIN64:        incl %eax
+; WIN64:        call{{.*}}   {{.*}}test_argReti32
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i32
+define x86_regcallcc i32 @test_CallargReti32(i32 %a)  {
+  %b = add i32 %a, 1
+  %c = call x86_regcallcc i32 @test_argReti32(i32 %b)
+  %d = add i32 %c, 1
+  ret i32 %d
+}
+
+; X32-LABEL:  test_argReti64:
+; X32:        addl $3, %eax
+; X32:        adcl $1, %ecx
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti64:
+; WIN64:        movabsq $4294967299, %r{{.*}}
+; WIN64:        addq %r{{.*}}, %rax
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i64
+define x86_regcallcc i64 @test_argReti64(i64 %a)  {
+  %add = add i64 %a, 4294967299
+  ret i64 %add
+}
+
+; X32-LABEL:  test_CallargReti64:
+; X32:        add{{.*}}  $1, %eax
+; X32:        adcl   $0, {{%e(cx|dx|si|di|bx|bp)}}
+; X32:        call{{.*}}   {{.*}}test_argReti64
+; X32:        add{{.*}}  $1, %eax
+; X32:        adcl   $0, {{%e(cx|dx|si|di|bx|bp)}}
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti64:
+; WIN64:        incq %rax
+; WIN64:        call{{.*}}   {{.*}}test_argReti64
+; WIN64:        incq %rax
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i64
+define x86_regcallcc i64 @test_CallargReti64(i64 %a)  {
+  %b = add i64 %a, 1
+  %c = call x86_regcallcc i64 @test_argReti64(i64 %b)
+  %d = add i64 %c, 1
+  ret i64 %d
+}
+
+; X32-LABEL:  test_argRetFloat:
+; X32:        vadd{{.*}}  {{.*}}, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRetFloat:
+; WIN64:        vadd{{.*}}  {{.*}}, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning float
+define x86_regcallcc float @test_argRetFloat(float %a)  {
+  %add = fadd float 1.0, %a
+  ret float %add
+}
+
+; X32-LABEL:  test_CallargRetFloat:
+; X32:        vadd{{.*}}  {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32:        call{{.*}}   {{.*}}test_argRetFloat
+; X32:        vadd{{.*}}  {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRetFloat:
+; WIN64:        vadd{{.*}}  {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64:        call{{.*}}   {{.*}}test_argRetFloat
+; WIN64:        vadd{{.*}}  {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving float
+define x86_regcallcc float @test_CallargRetFloat(float %a)  {
+  %b = fadd float 1.0, %a
+  %c = call x86_regcallcc float @test_argRetFloat(float %b)
+  %d = fadd float 1.0, %c
+  ret float %d
+}
+
+; X32-LABEL:  test_argRetDouble:
+; X32:        vadd{{.*}}  {{.*}}, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRetDouble:
+; WIN64:        vadd{{.*}}  {{.*}}, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning double
+define x86_regcallcc double @test_argRetDouble(double %a)  {
+  %add = fadd double %a, 1.0
+  ret double %add
+}
+
+; X32-LABEL:  test_CallargRetDouble:
+; X32:        vadd{{.*}}  {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32:        call{{.*}}   {{.*}}test_argRetDouble
+; X32:        vadd{{.*}}  {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRetDouble:
+; WIN64:        vadd{{.*}}  {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64:        call{{.*}}   {{.*}}test_argRetDouble
+; WIN64:        vadd{{.*}}  {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving double
+define x86_regcallcc double @test_CallargRetDouble(double %a)  {
+  %b = fadd double 1.0, %a
+  %c = call x86_regcallcc double @test_argRetDouble(double %b)
+  %d = fadd double 1.0, %c
+  ret double %d
+}
+
+; X32-LABEL:  test_argRetPointer:
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRetPointer:
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning pointer
+define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a)  {
+  %b = ptrtoint [4 x i32]* %a to i32
+  %c = add i32 %b, 1
+  %d = inttoptr i32 %c to [4 x i32]*
+  ret [4 x i32]* %d
+}
+
+; X32-LABEL:  test_CallargRetPointer:
+; X32:        incl %eax
+; X32:        call{{.*}}   {{.*}}test_argRetPointer
+; X32:        incl %eax
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRetPointer:
+; WIN64:        incl %eax
+; WIN64:        call{{.*}}   {{.*}}test_argRetPointer
+; WIN64:        incl %eax
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving pointer
+define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a)  {
+  %b = ptrtoint [4 x i32]* %a to i32
+  %c = add i32 %b, 1
+  %d = inttoptr i32 %c to [4 x i32]*
+  %e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
+  %f = ptrtoint [4 x i32]* %e to i32
+  %g = add i32 %f, 1
+  %h = inttoptr i32 %g to [4 x i32]*
+  ret [4 x i32]* %h
+}
+
+; X32-LABEL:  test_argRet128Vector:
+; X32:        vpblend{{.*}}  %xmm0, %xmm1, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRet128Vector:
+; WIN64:        vpblend{{.*}}  %xmm0, %xmm1, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning 128 bit vector
+define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %b)  {
+  %d = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %d
+}
+
+; X32-LABEL:  test_CallargRet128Vector:
+; X32:        vmov{{.*}}  %xmm0, {{%xmm([0-7])}}
+; X32:        call{{.*}}   {{.*}}test_argRet128Vector
+; X32:        vpblend{{.*}}  {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRet128Vector:
+; WIN64:        vmov{{.*}}  %xmm0, {{%xmm([0-9]+)}}
+; WIN64:        call{{.*}}   {{.*}}test_argRet128Vector
+; WIN64:        vpblend{{.*}}  {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving 128 bit vector
+define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a)  {
+  %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a)
+  %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %c
+}
+
+; X32-LABEL:  test_argRet256Vector:
+; X32:        vpblend{{.*}}  %ymm0, %ymm1, %ymm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRet256Vector:
+; WIN64:        vpblend{{.*}}  %ymm0, %ymm1, %ymm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning 256 bit vector
+define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %b)  {
+  %d = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %d
+}
+
+; X32-LABEL:  test_CallargRet256Vector:
+; X32:        vmov{{.*}}  %ymm0, %ymm1
+; X32:        call{{.*}}   {{.*}}test_argRet256Vector
+; X32:        vpblend{{.*}}  %ymm1, %ymm0, %ymm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRet256Vector:
+; WIN64:        vmov{{.*}}  %ymm0, %ymm1
+; WIN64:        call{{.*}}   {{.*}}test_argRet256Vector
+; WIN64:        vpblend{{.*}}  %ymm1, %ymm0, %ymm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving 256 bit vector
+define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a)  {
+  %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a)
+  %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %c
+}
+
+; X32-LABEL:  test_argRet512Vector:
+; X32:        vpblend{{.*}}  %zmm0, %zmm1, %zmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argRet512Vector:
+; WIN64:        vpblend{{.*}}  %zmm0, %zmm1, %zmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning 512 bit vector
+define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %b)  {
+  %d = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %d
+}
+
+; X32-LABEL:  test_CallargRet512Vector:
+; X32:        vmov{{.*}}  %zmm0, %zmm1
+; X32:        call{{.*}}   {{.*}}test_argRet512Vector
+; X32:        vpblend{{.*}}  %zmm1, %zmm0, %zmm0
+; X32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargRet512Vector:
+; WIN64:        vmov{{.*}}  %zmm0, %zmm1
+; WIN64:        call{{.*}}   {{.*}}test_argRet512Vector
+; WIN64:        vpblend{{.*}}  %zmm1, %zmm0, %zmm0
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving 512 bit vector
+define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i32> %a)  {
+  %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a)
+  %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %c
+}
+
+; WIN64-LABEL: testf32_inp
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; WIN64: retq
+
+; X32-LABEL: testf32_inp
+; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}}  {{#+}} 16-byte Spill
+; X32: vmovups {{%xmm([0-7])}}, {{.*(%esp).*}}  {{#+}} 16-byte Spill
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: {{.*}} {{%zmm[0-7]}}, {{%zmm[0-7]}}, {{%zmm[0-7]}}
+; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}}  {{#+}} 16-byte Reload
+; X32: vmovups {{.*(%esp).*}}, {{%xmm([0-7])}}  {{#+}} 16-byte Reload
+; X32: retl
+
+; LINUXOSX64-LABEL: testf32_inp
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: {{.*}} {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}, {{%zmm([0-9]|1[0-1])}}
+; LINUXOSX64: retq
+
+; Test regcall when running multiple input parameters - callee saved XMMs
+define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
+  %x1 = fadd <32 x float> %a, %b
+  %x2 = fmul <32 x float> %a, %b
+  %x3 = fsub <32 x float> %x1, %x2
+  %x4 = fadd <32 x float> %x3, %c
+  ret <32 x float> %x4
+}
+
+; X32-LABEL: pushl {{%e(si|di|bx|bp)}}
+; X32: pushl {{%e(si|di|bx|bp)}}
+; X32: pushl {{%e(si|di|bx|bp)}}
+; X32: pushl {{%e(si|di|bx|bp)}}
+; X32: popl {{%e(si|di|bx|bp)}}
+; X32: popl {{%e(si|di|bx|bp)}}
+; X32: popl {{%e(si|di|bx|bp)}}
+; X32: popl {{%e(si|di|bx|bp)}}
+; X32: retl
+
+; WIN64-LABEL: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: retq
+
+; LINUXOSX64-LABEL: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64: retq
+
+; Test regcall when running multiple input parameters - callee saved GPRs
+define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
+                                      i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
+  %x1 = sub i32 %a1, %a2
+  %x2 = sub i32 %a3, %a4
+  %x3 = sub i32 %a5, %a6
+  %y1 = sub i32 %b1, %b2
+  %y2 = sub i32 %b3, %b4
+  %y3 = sub i32 %b5, %b6
+  %v1 = add i32 %a1, %a2
+  %v2 = add i32 %a3, %a4
+  %v3 = add i32 %a5, %a6
+  %w1 = add i32 %b1, %b2
+  %w2 = add i32 %b3, %b4
+  %w3 = add i32 %b5, %b6
+  %s1 = mul i32 %x1, %y1
+  %s2 = mul i32 %x2, %y2
+  %s3 = mul i32 %x3, %y3
+  %t1 = mul i32 %v1, %w1
+  %t2 = mul i32 %v2, %w2
+  %t3 = mul i32 %v3, %w3
+  %m1 = add i32 %s1, %s2
+  %m2 = add i32 %m1, %s3
+  %n1 = add i32 %t1, %t2
+  %n2 = add i32 %n1, %t3
+  %r1 = add i32 %m2, %n2
+  ret i32 %r1
+}
+
+; X32-LABEL: testf32_stack
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{%zmm([0-7])}}, {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-7])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}}
+; X32: vaddps {{([0-9])+}}(%ebp), {{%zmm([0-7])}}, {{%zmm([0-1])}}
+; X32: retl
+
+; LINUXOSX64-LABEL: testf32_stack
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}, {{%zmm([0-9]+)}}
+; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}}
+; LINUXOSX64: vaddps {{([0-9])+}}(%rbp), {{%zmm([0-9]+)}}, {{%zmm([0-1])}}
+; LINUXOSX64: retq
+
+; Test that parameters, overflowing register capacity, are passed through the stack
+define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, 
+                                               <32 x float> %a1, <32 x float> %b1, <32 x float> %c1,
+                                               <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
+  %x1 = fadd <32 x float> %a0, %b0
+  %x2 = fadd <32 x float> %c0, %x1
+  %x3 = fadd <32 x float> %a1, %x2
+  %x4 = fadd <32 x float> %b1, %x3
+  %x5 = fadd <32 x float> %c1, %x4
+  %x6 = fadd <32 x float> %a2, %x5
+  %x7 = fadd <32 x float> %b2, %x6
+  %x8 = fadd <32 x float> %c2, %x7
+  ret <32 x float> %x8
+}
+
+; X32-LABEL: vmovd   %edx, {{%xmm([0-9])}}
+; X32:       vcvtsi2sdl      %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; X32:       vcvtsi2sdl      %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; X32:       vcvtsi2sdl      %esi, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; X32:       vaddsd  %xmm1, %xmm0, %xmm0
+; X32:       vcvttsd2si      %xmm0, %eax
+; X32:       retl
+
+; LINUXOSX64-LABEL: test_argRetMixTypes
+; LINUXOSX64:       vcvtss2sd       %xmm1, %xmm1, %xmm1
+; LINUXOSX64:       vcvtsi2sdl      %eax, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; LINUXOSX64:       vcvtsi2sdl      %ecx, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; LINUXOSX64:       vcvtsi2sdq      %rdx, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; LINUXOSX64:       vcvtsi2sdl      %edi, {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; LINUXOSX64:       vcvtsi2sdl      (%rsi), {{%xmm([0-9])}}, {{%xmm([0-9])}}
+; LINUXOSX64:       vcvttsd2si      {{%xmm([0-9])}}, %eax
+
+; Test regcall when passing/retrieving mixed types
+define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
+  %8 = fpext float %1 to double
+  %9 = fadd double %8, %0
+  %10 = sitofp i8 %2 to double
+  %11 = fadd double %9, %10
+  %12 = sitofp i32 %3 to double
+  %13 = fadd double %11, %12
+  %14 = sitofp i64 %4 to double
+  %15 = fadd double %13, %14
+  %16 = sitofp i16 %5 to double
+  %17 = fadd double %15, %16
+  %18 = load i32, i32* %6, align 4
+  %19 = sitofp i32 %18 to double
+  %20 = fadd double %17, %19
+  %21 = fptosi double %20 to i32
+  ret i32 %21
+}
+
+%struct.complex = type { float, double, i32, i8, i64}
+
+
+; X32-LABEL: test_argMultiRet    
+; X32:       vaddsd {{.*}}, %xmm1, %xmm1
+; X32:       movl    $4, %eax
+; X32:       movb    $7, %cl
+; X32:       movl    $999, %edx
+; X32:       xorl    %edi, %edi
+; X32:       retl
+
+; LINUXOSX64-LABEL: test_argMultiRet 
+; LINUXOSX64:       vaddsd  {{.*}}, %xmm1, %xmm1
+; LINUXOSX64:       movl    $4, %eax
+; LINUXOSX64:       movb    $7, %cl
+; LINUXOSX64:       movl    $999, %edx
+; LINUXOSX64:       retq
+        
+define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
+  %6 = fadd double %1, 5.000000e+00
+  %7 = insertvalue %struct.complex undef, float %0, 0
+  %8 = insertvalue %struct.complex %7, double %6, 1
+  %9 = insertvalue %struct.complex %8, i32 4, 2
+  %10 = insertvalue %struct.complex %9, i8 7, 3
+  %11 = insertvalue %struct.complex %10, i64 999, 4
+  ret %struct.complex %11
+}
+

Added: llvm/trunk/test/CodeGen/X86/sse-regcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-regcall.ll?rev=284108&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-regcall.ll (added)
+++ llvm/trunk/test/CodeGen/X86/sse-regcall.ll Thu Oct 13 02:53:43 2016
@@ -0,0 +1,207 @@
+; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
+
+; WIN32-LABEL:  test_argReti1:
+; WIN32:        incb  %al
+; WIN32:        ret{{.*}}
+
+; WIN64-LABEL:  test_argReti1:
+; WIN64:        incb  %al
+; WIN64:        ret{{.*}}
+
+; Test regcall when receiving/returning i1
+define x86_regcallcc i1 @test_argReti1(i1 %a)  {
+  %add = add i1 %a, 1
+  ret i1 %add
+}
+
+; WIN32-LABEL:  test_CallargReti1:
+; WIN32:        movzbl  %al, %eax
+; WIN32:        call{{.*}}   {{.*}}test_argReti1
+; WIN32:        incb  %al
+; WIN32:        ret{{.*}}
+
+; WIN64-LABEL:  test_CallargReti1:
+; WIN64:        movzbl  %al, %eax
+; WIN64:        call{{.*}}   {{.*}}test_argReti1
+; WIN64:        incb  %al
+; WIN64:        ret{{.*}}
+
+; Test regcall when passing/retrieving i1
+define x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
+  %b = add i1 %a, 1
+  %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
+  %d = add i1 %c, 1
+  ret i1 %d
+}
+
+; WIN64-LABEL: testf32_inp
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: retq
+
+; WIN32-LABEL: testf32_inp
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
+; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
+; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
+; WIN32: retl
+
+; LINUXOSX-LABEL: testf32_inp
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: retq
+
+;test calling conventions - input parameters, callee saved XMMs
+define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
+  %x1 = fadd <16 x float> %a, %b
+  %x2 = fmul <16 x float> %a, %b
+  %x3 = fsub <16 x float> %x1, %x2
+  %x4 = fadd <16 x float> %x3, %c
+  ret <16 x float> %x4
+}
+
+; WIN32-LABEL: testi32_inp
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: pushl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: popl {{%e(si|di|bx|bp)}}
+; WIN32: retl
+
+; WIN64-LABEL: testi32_inp
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: popq	{{%r(bp|bx|1[0-5])}}
+; WIN64: retq
+
+; LINUXOSX-LABEL: testi32_inp
+; LINUXOSX: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: popq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX: retq
+
+;test calling conventions - input parameters, callee saved GPRs
+define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
+                                      i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
+  %x1 = sub i32 %a1, %a2
+  %x2 = sub i32 %a3, %a4
+  %x3 = sub i32 %a5, %a6
+  %y1 = sub i32 %b1, %b2
+  %y2 = sub i32 %b3, %b4
+  %y3 = sub i32 %b5, %b6
+  %v1 = add i32 %a1, %a2
+  %v2 = add i32 %a3, %a4
+  %v3 = add i32 %a5, %a6
+  %w1 = add i32 %b1, %b2
+  %w2 = add i32 %b3, %b4
+  %w3 = add i32 %b5, %b6
+  %s1 = mul i32 %x1, %y1
+  %s2 = mul i32 %x2, %y2
+  %s3 = mul i32 %x3, %y3
+  %t1 = mul i32 %v1, %w1
+  %t2 = mul i32 %v2, %w2
+  %t3 = mul i32 %v3, %w3
+  %m1 = add i32 %s1, %s2
+  %m2 = add i32 %m1, %s3
+  %n1 = add i32 %t1, %t2
+  %n2 = add i32 %n1, %t3
+  %r1 = add i32 %m2, %n2
+  ret i32 %r1
+}
+
+; X32: testf32_stack
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: movaps {{%xmm([0-7])}}, {{(-*[0-9])+}}(%ebp)
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: addps {{([0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+; X32: movaps {{(-*[0-9])+}}(%ebp), {{%xmm([0-7])}}
+
+; LINUXOSX: testf32_stack
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{%xmm([0-9]+)}}, {{%xmm([0-9]+)}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: addps {{([0-9])+}}(%rsp), {{%xmm([0-7])}}
+; LINUXOSX: retq
+
+; Test that parameters, overflowing register capacity, are passed through the stack
+define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
+  %x1 = fadd <32 x float> %a, %b
+  %x2 = fadd <32 x float> %x1, %c
+  ret <32 x float> %x2
+}




More information about the llvm-commits mailing list