[llvm] r220745 - X86: Implement the vectorcall calling convention

Reid Kleckner reid at kleckner.net
Mon Oct 27 18:29:27 PDT 2014


Author: rnk
Date: Mon Oct 27 20:29:26 2014
New Revision: 220745

URL: http://llvm.org/viewvc/llvm-project?rev=220745&view=rev
Log:
X86: Implement the vectorcall calling convention

This is a Microsoft calling convention that supports both x86 and x86_64
subtargets. It passes vector and floating point arguments in XMM0-XMM5,
and passes them indirectly once they are consumed.

Homogenous vector aggregates of up to four elements can be passed in
sequential vector registers, but this part is not implemented in LLVM
and will be handled in Clang.

On 32-bit x86, it is similar to fastcall in that it uses ecx:edx as
integer register parameters and is callee cleanup. On x86_64, it
delegates to the normal win64 calling convention.

Reviewers: majnemer

Differential Revision: http://reviews.llvm.org/D5943

Added:
    llvm/trunk/test/CodeGen/X86/vectorcall.ll
Modified:
    llvm/trunk/include/llvm/IR/CallingConv.h
    llvm/trunk/lib/AsmParser/LLLexer.cpp
    llvm/trunk/lib/AsmParser/LLParser.cpp
    llvm/trunk/lib/AsmParser/LLToken.h
    llvm/trunk/lib/IR/AsmWriter.cpp
    llvm/trunk/lib/IR/Mangler.cpp
    llvm/trunk/lib/Target/X86/X86CallingConv.h
    llvm/trunk/lib/Target/X86/X86CallingConv.td

Modified: llvm/trunk/include/llvm/IR/CallingConv.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/CallingConv.h?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/CallingConv.h (original)
+++ llvm/trunk/include/llvm/IR/CallingConv.h Mon Oct 27 20:29:26 2014
@@ -140,7 +140,11 @@ namespace CallingConv {
     /// convention differs from the more common \c X86_64_SysV convention
     /// in a number of ways, most notably in that XMM registers used to pass
     /// arguments are shadowed by GPRs, and vice versa.
-    X86_64_Win64 = 79
+    X86_64_Win64 = 79,
+
+    /// \brief MSVC calling convention that passes vectors and vector aggregates
+    /// in SSE registers.
+    X86_VectorCall = 80
   };
 } // End CallingConv namespace
 

Modified: llvm/trunk/lib/AsmParser/LLLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLLexer.cpp?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLLexer.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLLexer.cpp Mon Oct 27 20:29:26 2014
@@ -580,6 +580,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(x86_stdcallcc);
   KEYWORD(x86_fastcallcc);
   KEYWORD(x86_thiscallcc);
+  KEYWORD(x86_vectorcallcc);
   KEYWORD(arm_apcscc);
   KEYWORD(arm_aapcscc);
   KEYWORD(arm_aapcs_vfpcc);

Modified: llvm/trunk/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLParser.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLParser.cpp Mon Oct 27 20:29:26 2014
@@ -1448,6 +1448,7 @@ bool LLParser::ParseOptionalDLLStorageCl
 ///   ::= 'x86_stdcallcc'
 ///   ::= 'x86_fastcallcc'
 ///   ::= 'x86_thiscallcc'
+///   ::= 'x86_vectorcallcc'
 ///   ::= 'arm_apcscc'
 ///   ::= 'arm_aapcscc'
 ///   ::= 'arm_aapcs_vfpcc'
@@ -1473,6 +1474,7 @@ bool LLParser::ParseOptionalCallingConv(
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
   case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+  case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;

Modified: llvm/trunk/lib/AsmParser/LLToken.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLToken.h?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLToken.h (original)
+++ llvm/trunk/lib/AsmParser/LLToken.h Mon Oct 27 20:29:26 2014
@@ -87,7 +87,7 @@ namespace lltok {
 
     kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
     kw_intel_ocl_bicc,
-    kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc,
     kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
     kw_msp430_intrcc,
     kw_ptx_kernel, kw_ptx_device,

Modified: llvm/trunk/lib/IR/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AsmWriter.cpp?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AsmWriter.cpp (original)
+++ llvm/trunk/lib/IR/AsmWriter.cpp Mon Oct 27 20:29:26 2014
@@ -285,6 +285,7 @@ static void PrintCallingConv(unsigned cc
   case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
   case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
   case CallingConv::X86_ThisCall:  Out << "x86_thiscallcc"; break;
+  case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;

Modified: llvm/trunk/lib/IR/Mangler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Mangler.cpp?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Mangler.cpp (original)
+++ llvm/trunk/lib/IR/Mangler.cpp Mon Oct 27 20:29:26 2014
@@ -22,7 +22,7 @@ using namespace llvm;
 
 static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
                               Mangler::ManglerPrefixTy PrefixTy,
-                              const DataLayout &DL, bool UseAt) {
+                              const DataLayout &DL, char Prefix) {
   SmallString<256> TmpData;
   StringRef Name = GVName.toStringRef(TmpData);
   assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
@@ -39,13 +39,8 @@ static void getNameWithPrefixx(raw_ostre
   else if (PrefixTy == Mangler::LinkerPrivate)
     OS << DL.getLinkerPrivateGlobalPrefix();
 
-  if (UseAt) {
-    OS << '@';
-  } else {
-    char Prefix = DL.getGlobalPrefix();
-    if (Prefix != '\0')
-      OS << Prefix;
-  }
+  if (Prefix != '\0')
+    OS << Prefix;
 
   // If this is a simple string that doesn't need escaping, just append it.
   OS << Name;
@@ -53,7 +48,8 @@ static void getNameWithPrefixx(raw_ostre
 
 void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
                                 ManglerPrefixTy PrefixTy) const {
-  return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, false);
+  char Prefix = DL->getGlobalPrefix();
+  return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, Prefix);
 }
 
 void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
@@ -63,11 +59,21 @@ void Mangler::getNameWithPrefix(SmallVec
   return getNameWithPrefix(OS, GVName, PrefixTy);
 }
 
-/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
-/// a suffix on their name indicating the number of words of arguments they
-/// take.
-static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
-                                     const DataLayout &TD) {
+static bool hasByteCountSuffix(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::X86_FastCall:
+  case CallingConv::X86_StdCall:
+  case CallingConv::X86_VectorCall:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Microsoft fastcall and stdcall functions require a suffix on their name
+/// indicating the number of words of arguments they take.
+static void addByteCountSuffix(raw_ostream &OS, const Function *F,
+                               const DataLayout &TD) {
   // Calculate arguments size total.
   unsigned ArgWords = 0;
   for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
@@ -76,8 +82,9 @@ static void AddFastCallStdCallSuffix(raw
     // 'Dereference' type in case of byval or inalloca parameter attribute.
     if (AI->hasByValOrInAllocaAttr())
       Ty = cast<PointerType>(Ty)->getElementType();
-    // Size should be aligned to DWORD boundary
-    ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+    // Size should be aligned to pointer size.
+    unsigned PtrSize = TD.getPointerSize();
+    ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
   }
 
   OS << '@' << ArgWords;
@@ -106,34 +113,40 @@ void Mangler::getNameWithPrefix(raw_ostr
   }
 
   StringRef Name = GV->getName();
+  char Prefix = DL->getGlobalPrefix();
 
-  bool UseAt = false;
-  const Function *MSFunc = nullptr;
-  CallingConv::ID CC;
-  if (Name[0] != '\1' && DL->hasMicrosoftFastStdCallMangling()) {
-    if ((MSFunc = dyn_cast<Function>(GV))) {
-      CC = MSFunc->getCallingConv();
-      // fastcall functions need to start with @ instead of _.
-      if (CC == CallingConv::X86_FastCall)
-        UseAt = true;
-    }
+  // Mangle functions with Microsoft calling conventions specially.  Only do
+  // this mangling for x86_64 vectorcall and 32-bit x86.
+  const Function *MSFunc = dyn_cast<Function>(GV);
+  if (Name.startswith("\01"))
+    MSFunc = nullptr; // Don't mangle when \01 is present.
+  CallingConv::ID CC = MSFunc ? MSFunc->getCallingConv() : CallingConv::C;
+  if (!DL->hasMicrosoftFastStdCallMangling() &&
+      CC != CallingConv::X86_VectorCall)
+    MSFunc = nullptr;
+  if (MSFunc) {
+    if (CC == CallingConv::X86_FastCall)
+      Prefix = '@'; // fastcall functions have an @ prefix instead of _.
+    else if (CC == CallingConv::X86_VectorCall)
+      Prefix = '\0'; // vectorcall functions have no prefix.
   }
 
-  getNameWithPrefixx(OS, Name, PrefixTy, *DL, UseAt);
+  getNameWithPrefixx(OS, Name, PrefixTy, *DL, Prefix);
 
   if (!MSFunc)
     return;
 
-  // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
-  // add it.
-  // fastcall and stdcall functions usually need @42 at the end to specify
-  // the argument info.
+  // If we are supposed to add a microsoft-style suffix for stdcall, fastcall,
+  // or vectorcall, add it.  These functions have a suffix of @N where N is the
+  // cumulative byte size of all of the parameters to the function in decimal.
+  if (CC == CallingConv::X86_VectorCall)
+    OS << '@'; // vectorcall functions use a double @ suffix.
   FunctionType *FT = MSFunc->getFunctionType();
-  if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+  if (hasByteCountSuffix(CC) &&
       // "Pure" variadic functions do not receive @0 suffix.
       (!FT->isVarArg() || FT->getNumParams() == 0 ||
        (FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
-    AddFastCallStdCallSuffix(OS, MSFunc, *DL);
+    addByteCountSuffix(OS, MSFunc, *DL);
 }
 
 void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.h?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.h (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.h Mon Oct 27 20:29:26 2014
@@ -20,6 +20,19 @@
 
 namespace llvm {
 
+inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT,
+                                         CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags,
+                                         CCState &State) {
+  // Similar to CCPassIndirect, with the addition of inreg.
+  LocVT = MVT::i32;
+  LocInfo = CCValAssign::Indirect;
+  ArgFlags.setInReg();
+  return false; // Continue the search, but now for i32.
+}
+
+
 inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
                                 CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
                                 CCState &) {

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=220745&r1=220744&r2=220745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Oct 27 20:29:26 2014
@@ -124,6 +124,24 @@ def RetCC_X86_32_HiPE : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
 ]>;
 
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_VectorCall : CallingConv<[
+  // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+  // 256-bit FP vectors
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+  // 512-bit FP vectors
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+            CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
+
+  // Return integers in the standard way.
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
 // X86-64 C return-value convention.
 def RetCC_X86_64_C : CallingConv<[
   // The X86-64 calling convention always returns FP values in XMM0.
@@ -179,6 +197,7 @@ def RetCC_X86_32 : CallingConv<[
   CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
   // If HiPE, use RetCC_X86_32_HiPE.
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
 
   // Otherwise, use RetCC_X86_32_C.
   CCDelegateTo<RetCC_X86_32_C>
@@ -330,6 +349,25 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[f80], CCAssignToStack<0, 0>>
 ]>;
 
+def CC_X86_Win64_VectorCall : CallingConv<[
+  // The first 6 floating point and vector types of 128 bits or less use
+  // XMM0-XMM5.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+  // 256-bit vectors use YMM registers.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+  // 512-bit vectors use ZMM registers.
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+           CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+  // Delegate to fastcall to handle integer types.
+  CCDelegateTo<CC_X86_Win64_C>
+]>;
+
+
 def CC_X86_64_GHC : CallingConv<[
   // Promote i8/i16/i32 arguments to i64.
   CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -463,6 +501,30 @@ def CC_X86_32_FastCall : CallingConv<[
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
+def CC_X86_32_VectorCall : CallingConv<[
+  // The first 6 floating point and vector types of 128 bits or less use
+  // XMM0-XMM5.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+  // 256-bit vectors use YMM registers.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+  // 512-bit vectors use ZMM registers.
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+           CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+  // Otherwise, pass it indirectly.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
+            v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
+            v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+           CCCustom<"CC_X86_32_VectorCallIndirect">>,
+
+  // Delegate to fastcall to handle integer types.
+  CCDelegateTo<CC_X86_32_FastCall>
+]>;
+
 def CC_X86_32_ThisCall_Common : CallingConv<[
   // The first integer argument is passed in ECX
   CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -576,6 +638,7 @@ def CC_Intel_OCL_BI : CallingConv<[
 // This is the root argument convention for the X86-32 backend.
 def CC_X86_32 : CallingConv<[
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
   CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
@@ -593,6 +656,7 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
   CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
   CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
+  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
 
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,

Added: llvm/trunk/test/CodeGen/X86/vectorcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vectorcall.ll?rev=220745&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vectorcall.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vectorcall.ll Mon Oct 27 20:29:26 2014
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+
+; Test integer arguments.
+
+define x86_vectorcallcc i32 @test_int_1() {
+  ret i32 0
+}
+
+; CHECK-LABEL: {{^}}test_int_1@@0:
+; CHECK: xorl %eax, %eax
+
+define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
+  ret i32 %a
+}
+
+; X86-LABEL: {{^}}test_int_2@@4:
+; X64-LABEL: {{^}}test_int_2@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
+  %at = trunc i64 %a to i32
+  ret i32 %at
+}
+
+; X86-LABEL: {{^}}test_int_3@@8:
+; X64-LABEL: {{^}}test_int_3@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
+  %s = add i32 %a, %b
+  ret i32 %s
+}
+
+; X86-LABEL: {{^}}test_int_4@@8:
+; X86: leal (%ecx,%edx), %eax
+
+; X64-LABEL: {{^}}test_int_4@@16:
+; X64: leal (%rcx,%rdx), %eax
+
+define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
+  ret i32 0
+}
+; CHECK-LABEL: {{^}}test_int_5:
+
+define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
+  ret double %b
+}
+; CHECK-LABEL: {{^}}test_fp_1@@16:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc double @test_fp_2(
+    double, double, double, double, double, double, double %r) {
+  ret double %r
+}
+; CHECK-LABEL: {{^}}test_fp_2@@56:
+; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
+
+define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
+  ret {double, double, double, double}
+        { double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_3@@0:
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+; FIXME: Returning via x87 isn't compatible, but its hard to structure the
+; tablegen any other way.
+define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
+  ret {double, double, double, double, double}
+        { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_4@@0:
+; CHECK: fldz
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
+  ret <16 x i8> %b
+}
+; CHECK-LABEL: {{^}}test_vec_1@@32:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc <16 x i8> @test_vec_2(
+    double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
+  ret <16 x i8> %r
+}
+; CHECK-LABEL: {{^}}test_vec_2@@104:
+; CHECK: movaps (%{{[re]}}cx), %xmm0





More information about the llvm-commits mailing list