[llvm] 6ee497a - [X86][Regcall] Add an option to respect regcall ABI v.4 in win64&win32
Bing1 Yu via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 2 22:58:48 PDT 2023
Author: Bing1 Yu
Date: 2023-08-03T13:58:33+08:00
New Revision: 6ee497aa0b48ad892447f29a90b4e61241949295
URL: https://github.com/llvm/llvm-project/commit/6ee497aa0b48ad892447f29a90b4e61241949295
DIFF: https://github.com/llvm/llvm-project/commit/6ee497aa0b48ad892447f29a90b4e61241949295.diff
LOG: [X86][Regcall] Add an option to respect regcall ABI v.4 in win64&win32
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D155863
Added:
clang/test/CodeGen/check-regcall4-moduleflag.c
clang/test/CodeGen/regcall4.c
clang/test/CodeGenCXX/regcall4.cpp
llvm/test/CodeGen/X86/sse-regcall4.ll
Modified:
clang/include/clang/Basic/LangOptions.def
clang/include/clang/Driver/Options.td
clang/lib/AST/ItaniumMangle.cpp
clang/lib/AST/Mangle.cpp
clang/lib/AST/MicrosoftMangle.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/cl-cc-flags.c
llvm/lib/Target/X86/X86CallingConv.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 007b3737f83e62..b6bb5e969e130c 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -429,6 +429,8 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0,
LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors")
+LANGOPT(RegCall4, 1, 0, "Set __regcall4 as a default calling convention to respect __regcall ABI v.4")
+
LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")
ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9e25a5e0b58a58..296fa1fcc38a02 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4505,6 +4505,9 @@ def no_offload_add_rpath: Flag<["--"], "no-offload-add-rpath">, Flags<[NoArgumen
Alias<frtlib_add_rpath>;
def r : Flag<["-"], "r">, Flags<[LinkerInput,NoArgumentUnused]>,
Group<Link_Group>;
+def regcall4 : Flag<["-"], "regcall4">, Group<m_Group>, Flags<[CC1Option]>,
+ HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">,
+ MarshallingInfoFlag<LangOpts<"RegCall4">>;
def save_temps_EQ : Joined<["-", "--"], "save-temps=">, Flags<[CC1Option, FlangOption, FC1Option, NoXarchOption]>,
HelpText<"Save intermediate compilation results.">;
def save_temps : Flag<["-", "--"], "save-temps">, Flags<[FlangOption, FC1Option, NoXarchOption]>,
@@ -7292,6 +7295,8 @@ def _SLASH_Gv : CLFlag<"Gv">,
HelpText<"Set __vectorcall as a default calling convention">;
def _SLASH_Gregcall : CLFlag<"Gregcall">,
HelpText<"Set __regcall as a default calling convention">;
+def _SLASH_Gregcall4 : CLFlag<"Gregcall4">,
+ HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">;
// GNU Driver aliases
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 16f0d90451f7ad..153f6dc2e9cf12 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1688,8 +1688,12 @@ void CXXNameMangler::mangleRegCallName(const IdentifierInfo *II) {
// <source-name> ::= <positive length number> __regcall3__ <identifier>
// <number> ::= [n] <non-negative decimal integer>
// <identifier> ::= <unqualified source code identifier>
- Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__"
- << II->getName();
+ if (getASTContext().getLangOpts().RegCall4)
+ Out << II->getLength() + sizeof("__regcall4__") - 1 << "__regcall4__"
+ << II->getName();
+ else
+ Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__"
+ << II->getName();
}
void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 31cdad4c8fdd4e..53af9fc4d51897 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -198,8 +198,12 @@ void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) {
Out << '_';
else if (CC == CCM_Fast)
Out << '@';
- else if (CC == CCM_RegCall)
- Out << "__regcall3__";
+ else if (CC == CCM_RegCall) {
+ if (getASTContext().getLangOpts().RegCall4)
+ Out << "__regcall4__";
+ else
+ Out << "__regcall3__";
+ }
if (!MCXX)
Out << D->getIdentifier()->getName();
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 3306d90dc85664..91af18d6119796 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2853,6 +2853,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
// ::= T # __attribute__((__swiftasynccall__))
// // Clang-only
// ::= w # __regcall
+ // ::= x # __regcall4
// The 'export' calling conventions are from a bygone era
// (*cough*Win16*cough*) when functions were declared for export with
// that keyword. (It didn't actually export them, it just made them so
@@ -2873,7 +2874,12 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
case CC_Swift: Out << 'S'; break;
case CC_SwiftAsync: Out << 'W'; break;
case CC_PreserveMost: Out << 'U'; break;
- case CC_X86RegCall: Out << 'w'; break;
+ case CC_X86RegCall:
+ if (getASTContext().getLangOpts().RegCall4)
+ Out << "x";
+ else
+ Out << "w";
+ break;
}
}
void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8d2abc69c330e7..e7cbc748b7a38b 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1196,6 +1196,8 @@ void CodeGenModule::Release() {
getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment);
if (getCodeGenOpts().SkipRaxSetup)
getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
+ if (getLangOpts().RegCall4)
+ getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1);
if (getContext().getTargetInfo().getMaxTLSAlign())
getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign",
@@ -1707,7 +1709,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
if (FD &&
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
- Out << "__regcall3__" << II->getName();
+ if (CGM.getLangOpts().RegCall4)
+ Out << "__regcall4__" << II->getName();
+ else
+ Out << "__regcall3__" << II->getName();
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index eae3643bd4bf59..c5155735000490 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7936,6 +7936,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
CmdArgs.push_back("-fms-memptr-rep=virtual");
}
+ if (Args.hasArg(options::OPT_regcall4))
+ CmdArgs.push_back("-regcall4");
+
// Parse the default calling convention options.
if (Arg *CCArg =
Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
@@ -7972,6 +7975,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
CmdArgs.push_back(DCCFlag);
}
+ if (Args.hasArg(options::OPT__SLASH_Gregcall4))
+ CmdArgs.push_back("-regcall4");
+
Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ);
if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
diff --git a/clang/test/CodeGen/check-regcall4-moduleflag.c b/clang/test/CodeGen/check-regcall4-moduleflag.c
new file mode 100644
index 00000000000000..0b968e3d19d827
--- /dev/null
+++ b/clang/test/CodeGen/check-regcall4-moduleflag.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=NO-REGCALL4
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -regcall4 -emit-llvm %s -o - | FileCheck %s -check-prefix=REGCALL4
+
+void f(void) {}
+
+// REGCALL4: !"RegCallv4", i32 1}
+// NO-REGCALL4-NOT: "RegCallv4"
diff --git a/clang/test/CodeGen/regcall4.c b/clang/test/CodeGen/regcall4.c
new file mode 100644
index 00000000000000..5fbe77fbc7d769
--- /dev/null
+++ b/clang/test/CodeGen/regcall4.c
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefixes=X86,Win32
+// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefixes=X64,Win64
+// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefixes=X86,Lin32
+// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=X64,Lin64
+
+#include <xmmintrin.h>
+
+void __regcall v1(int a, int b) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__v1(i32 inreg noundef %a, i32 inreg noundef %b)
+// X64: define dso_local x86_regcallcc void @__regcall4__v1(i32 noundef %a, i32 noundef %b)
+
+void __attribute__((regcall)) v1b(int a, int b) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__v1b(i32 inreg noundef %a, i32 inreg noundef %b)
+// X64: define dso_local x86_regcallcc void @__regcall4__v1b(i32 noundef %a, i32 noundef %b)
+
+void __regcall v2(char a, char b) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__v2(i8 inreg noundef signext %a, i8 inreg noundef signext %b)
+// Win64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef %a, i8 noundef %b)
+// Lin64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef signext %a, i8 noundef signext %b)
+
+struct Small { int x; };
+void __regcall v3(int a, struct Small b, int c) {}
+// Win32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 %b.0, i32 inreg noundef %c)
+// Lin32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 inreg %0, i32 %b.0, i32 inreg noundef %c)
+// X64: define dso_local x86_regcallcc void @__regcall4__v3(i32 noundef %a, i32 %b.coerce, i32 noundef %c)
+
+struct Large { int a[5]; };
+void __regcall v4(int a, struct Large b, int c) {}
+// Win32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 inreg noundef %c)
+// Lin32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 noundef %c)
+// Win64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, ptr noundef %b, i32 noundef %c)
+// Lin64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, [5 x i32] %b.coerce, i32 noundef %c)
+
+void __regcall v5(long long a, int b, int c) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 inreg noundef %b, i32 inreg noundef %c)
+// X64: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 noundef %b, i32 noundef %c)
+
+struct HFA2 { double x, y; };
+struct HFA4 { double w, x, y, z; };
+struct HFA5 { double v, w, x, y, z; };
+
+void __regcall hfa1(int a, struct HFA4 b, int c) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 inreg noundef %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg noundef %c)
+// X64: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 noundef %a, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, i32 noundef %c)
+
+// HFAs that would require more than six total SSE registers are passed
+// indirectly. Additional vector arguments can consume the rest of the SSE
+// registers.
+void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, ptr inreg noundef %0)
+// X64: define dso_local x86_regcallcc void @__regcall4__hfa2(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double noundef %c)
+
+// Ensure that we pass builtin types directly while counting them against the
+// SSE register usage.
+void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %f.0, double %f.1)
+// X64: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %{{.*}}, double %{{.*}})
+
+// Aggregates with more than four elements are not HFAs and are passed byval(%b.3, double noundef).
+// Because they are not classified as homogeneous, they don't get special
+// handling to ensure alignment.
+void __regcall hfa4(struct HFA5 a) {}
+// X32: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef byval(%struct.HFA5) align 4 %{{.*}})
+// Win64: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef %a)
+// Lin64: define dso_local x86_regcallcc void @__regcall4__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4)
+
+// Return HFAs of 4 or fewer elements in registers.
+static struct HFA2 g_hfa2;
+struct HFA2 __regcall hfa5(void) { return g_hfa2; }
+// X86: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()
+// X64: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()
+
+typedef float __attribute__((vector_size(16))) v4f32;
+struct HVA2 { v4f32 x, y; };
+struct HVA4 { v4f32 w, x, y, z; };
+
+void __regcall hva1(int a, struct HVA4 b, int c) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hva1(i32 inreg noundef %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg noundef %c)
+// X64: define dso_local x86_regcallcc void @__regcall4__hva1(i32 noundef %a, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 noundef %c)
+
+void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %0)
+// X64: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> noundef %c)
+
+void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %f.0, <4 x float> %f.1)
+// X64: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+
+typedef float __attribute__((ext_vector_type(3))) v3f32;
+struct OddSizeHVA { v3f32 x, y; };
+
+void __regcall odd_size_hva(struct OddSizeHVA a) {}
+// X86: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
+// X64: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %{{.*}}, <3 x float> %{{.*}})
+
+struct HFA6 { __m128 f[4]; };
+struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;}
+// X86: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %c, ptr inreg noundef %d)
+// Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3)
+// Lin64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce)
diff --git a/clang/test/CodeGenCXX/regcall4.cpp b/clang/test/CodeGenCXX/regcall4.cpp
new file mode 100644
index 00000000000000..7c35db36e1053d
--- /dev/null
+++ b/clang/test/CodeGenCXX/regcall4.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -regcall4 -triple x86_64-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN64 %s
+// RUN: %clang_cc1 -regcall4 -triple i386-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN32 %s
+// RUN: %clang_cc1 -regcall4 -triple x86_64-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-WIN64 %s
+// RUN: %clang_cc1 -regcall4 -triple i386-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-WIN32 %s
+
+int __regcall foo(int i);
+
+int main()
+{
+ int p = 0, _data;
+ auto lambda = [&](int parameter) -> int {
+ _data = foo(parameter);
+ return _data;
+ };
+ return lambda(p);
+}
+// CHECK-LIN: call x86_regcallcc {{.+}} @_Z15__regcall4__foo
+// CHECK-WIN64: call x86_regcallcc {{.+}} @"?foo@@YxHH at Z"
+// CHECK-WIN32: call x86_regcallcc {{.+}} @"?foo@@YxHH at Z"
+
+int __regcall foo (int i){
+ return i;
+}
+// CHECK-LIN: define{{.*}} x86_regcallcc noundef {{.+}}@_Z15__regcall4__foo
+// CHECK-WIN64: define dso_local x86_regcallcc noundef {{.+}}@"?foo@@YxHH at Z"
+// CHECK-WIN32: define dso_local x86_regcallcc noundef {{.+}}@"?foo@@YxHH at Z"
+
+// used to give a body to test_class functions
+static int x = 0;
+class test_class {
+ int a;
+public:
+#ifndef WIN_TEST
+ __regcall
+#endif
+ test_class(){++x;}
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC1Ev
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC2Ev
+ // Windows ignores calling convention on constructor/destructors.
+ // CHECK-WIN64-DAG: define linkonce_odr dso_local noundef ptr @"??0test_class@@QEAA at XZ"
+ // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??0test_class@@QAE at XZ"
+
+#ifndef WIN_TEST
+ __regcall
+#endif
+ ~test_class(){--x;}
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD2Ev
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD1Ev
+ // Windows ignores calling convention on constructor/destructors.
+ // CHECK-WIN64-DAG: define linkonce_odr dso_local void @"??1test_class@@QEAA at XZ"
+ // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"??1test_class@@QAE at XZ"
+
+ test_class& __regcall operator+=(const test_class&){
+ return *this;
+ }
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @_ZN10test_classpLERKS_
+ // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @"??Ytest_class@@QEAxAEAV0 at AEBV0@@Z"
+ // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @"??Ytest_class@@QAxAAV0 at ABV0@@Z"
+ void __regcall do_thing(){}
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall4__do_thingEv
+ // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"?do_thing at test_class@@QEAxXXZ"
+ // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"?do_thing at test_class@@QAxXXZ"
+
+ template<typename T>
+ void __regcall tempFunc(T i){}
+ // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall4__tempFuncIiEEvT_
+ // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc at H@@YxXH at Z"
+ // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc at H@@YxXH at Z"
+};
+
+bool __regcall operator ==(const test_class&, const test_class&){ --x; return false;}
+// CHECK-LIN-DAG: define{{.*}} x86_regcallcc noundef zeroext i1 @_ZeqRK10test_classS1_
+// CHECK-WIN64-DAG: define dso_local x86_regcallcc noundef zeroext i1 @"??8 at Yx_NAEBVtest_class@@0 at Z"
+// CHECK-WIN32-DAG: define dso_local x86_regcallcc noundef zeroext i1 @"??8 at Yx_NABVtest_class@@0 at Z"
+
+test_class __regcall operator""_test_class (unsigned long long) { ++x; return test_class{};}
+// CHECK-LIN64-DAG: define{{.*}} x86_regcallcc void @_Zli11_test_classy(ptr noalias sret(%class.test_class) align 4 %agg.result, i64 noundef %0)
+// CHECK-LIN32-DAG: define{{.*}} x86_regcallcc void @_Zli11_test_classy(ptr inreg noalias sret(%class.test_class) align 4 %agg.result, i64 noundef %0)
+// CHECK-WIN64-DAG: ??__K_test_class@@Yx?AVtest_class@@_K at Z"
+// CHECK-WIN32-DAG: ??__K_test_class@@Yx?AVtest_class@@_K at Z"
+
+template<typename T>
+void __regcall freeTempFunc(T i){}
+// CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_Z24__regcall4__freeTempFuncIiEvT_
+// CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc at H@@YxXH at Z"
+// CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc at H@@YxXH at Z"
+
+// class to force generation of functions
+void force_gen() {
+ test_class t;
+ test_class t2 = 12_test_class;
+ t += t2;
+ auto t3 = 100_test_class;
+ t3.tempFunc(1);
+ freeTempFunc(1);
+ t3.do_thing();
+}
+
+long double _Complex __regcall foo(long double _Complex f) {
+ return f;
+}
+// CHECK-LIN64-DAG: define{{.*}} x86_regcallcc void @_Z15__regcall4__fooCe(ptr noalias sret({ x86_fp80, x86_fp80 }) align 16 %agg.result, ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 %f)
+// CHECK-LIN32-DAG: define{{.*}} x86_regcallcc void @_Z15__regcall4__fooCe(ptr inreg noalias sret({ x86_fp80, x86_fp80 }) align 4 %agg.result, ptr noundef byval({ x86_fp80, x86_fp80 }) align 4 %f)
+// CHECK-WIN64-DAG: define dso_local x86_regcallcc noundef { double, double } @"?foo@@YxU?$_Complex at O@__clang@@U12@@Z"(double noundef %f.0, double noundef %f.1)
+// CHECK-WIN32-DAG: define dso_local x86_regcallcc noundef { double, double } @"?foo@@YxU?$_Complex at O@__clang@@U12@@Z"(double noundef %f.0, double noundef %f.1)
+
+// The following caused us to dereference uninitialized memory. The long name
+// seems necessary, as does the return types.
+float _Complex __regcall callee(float _Complex f);
+// CHECK-LIN64-DAG: declare x86_regcallcc noundef <2 x float> @_Z18__regcall4__calleeCf(<2 x float> noundef)
+// CHECK-LIN32-DAG: declare x86_regcallcc noundef { float, float } @_Z18__regcall4__calleeCf(float noundef, float noundef)
+// CHECK-WIN64-DAG: declare dso_local x86_regcallcc noundef { float, float } @"?callee@@YxU?$_Complex at M@__clang@@U12@@Z"(float noundef, float noundef)
+// CHECK-WIN32-DAG: declare dso_local x86_regcallcc noundef { float, float } @"?callee@@YxU?$_Complex at M@__clang@@U12@@Z"(float noundef, float noundef)
+
+__regcall int
+some_really_long_name_that_manages_to_hit_the_right_spot_of_mem(int a) {
+ float _Complex x[2];
+ x[0] = callee(x[0]);
+ return a;
+}
diff --git a/clang/test/Driver/cl-cc-flags.c b/clang/test/Driver/cl-cc-flags.c
index 6fa0b6bd8e92f6..eacaee2c276978 100644
--- a/clang/test/Driver/cl-cc-flags.c
+++ b/clang/test/Driver/cl-cc-flags.c
@@ -16,6 +16,10 @@
// RUN: %clang_cl --target=i686-windows-msvc /Gregcall -### -- %s 2>&1 | FileCheck --check-prefix=REGCALL %s
// REGCALL: -fdefault-calling-conv=regcall
+// RUN: %clang_cl --target=i686-windows-msvc /Gregcall /Gregcall4 -### -- %s 2>&1 | FileCheck --check-prefix=REGCALL4 %s
+// REGCALL4: -fdefault-calling-conv=regcall
+// REGCALL4: -regcall4
+
// Last one should win:
// RUN: %clang_cl --target=i686-windows-msvc /Gd /Gv -### -- %s 2>&1 | FileCheck --check-prefix=LASTWINS_VECTOR %s
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 06cebdc2159438..3ce59dc4aa61bd 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -23,6 +23,11 @@ class CCIfNotSubtarget<string F, CCAction A>
"(State.getMachineFunction().getSubtarget()).", F),
A>;
+/// CCIfRegCallv4 - Match if RegCall ABIv4 is respected.
+class CCIfRegCallv4<CCAction A>
+ : CCIf<"State.getMachineFunction().getFunction().getParent()->getModuleFlag(\"RegCallv4\")!=nullptr",
+ A>;
+
/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
class CCIfIsVarArgOnWin<CCAction A>
: CCIf<"State.isVarArg() && "
@@ -55,6 +60,20 @@ def RC_X86_32_RegCall : RC_X86_RegCall {
let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
}
+// RegCall register classes for 32 bits if it respect regcall ABI v.4
+// Change in __regcall ABI v.4: don't use EAX as a spare register is
+// needed to code virtual call thunk,
+def RC_X86_32_RegCallv4_Win : RC_X86_RegCall {
+ let GPR_8 = [CL, DL, DIL, SIL];
+ let GPR_16 = [CX, DX, DI, SI];
+ let GPR_32 = [ECX, EDX, EDI, ESI];
+ let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
+ ///< \todo Fix AssignToReg to enable empty lists
+ let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+ let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
+ let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
+}
+
class RC_X86_64_RegCall : RC_X86_RegCall {
let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
@@ -71,6 +90,18 @@ def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
}
+// On Windows 64 we don't want to use R13 - it is reserved for
+// largely aligned stack.
+// Change in __regcall ABI v.4: additionally don't use R10 as a
+// a spare register is needed to code virtual call thunk.
+//
+def RC_X86_64_RegCallv4_Win : RC_X86_64_RegCall {
+ let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R11B, R12B, R14B, R15B];
+ let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R11W, R12W, R14W, R15W];
+ let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R11D, R12D, R14D, R15D];
+ let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R11, R12, R14, R15];
+}
+
def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
@@ -433,8 +464,12 @@ def RetCC_X86_64_AnyReg : CallingConv<[
defm X86_32_RegCall :
X86_RegCall_base<RC_X86_32_RegCall>;
+defm X86_32_RegCallv4_Win :
+ X86_RegCall_base<RC_X86_32_RegCallv4_Win>;
defm X86_Win64_RegCall :
X86_RegCall_base<RC_X86_64_RegCall_Win>;
+defm X86_Win64_RegCallv4 :
+ X86_RegCall_base<RC_X86_64_RegCallv4_Win>;
defm X86_SysV64_RegCall :
X86_RegCall_base<RC_X86_64_RegCall_SysV>;
@@ -447,6 +482,8 @@ def RetCC_X86_32 : CallingConv<[
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4<CCDelegateTo<RetCC_X86_32_RegCallv4_Win>>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
// Otherwise, use RetCC_X86_32_C.
@@ -473,6 +510,9 @@ def RetCC_X86_64 : CallingConv<[
// Handle Vectorcall CC
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_64_Vectorcall>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4<CCDelegateTo<RetCC_X86_Win64_RegCallv4>>>>,
+
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()",
CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
@@ -1052,6 +1092,8 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4<CCDelegateTo<CC_X86_32_RegCallv4_Win>>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
// Otherwise, drop to normal X86-32 CC
@@ -1067,6 +1109,8 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4<CCDelegateTo<CC_X86_Win64_RegCallv4>>>>,
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
diff --git a/llvm/test/CodeGen/X86/sse-regcall4.ll b/llvm/test/CodeGen/X86/sse-regcall4.ll
new file mode 100644
index 00000000000000..80eaf0f9000668
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-regcall4.ll
@@ -0,0 +1,467 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
+
+; Test regcall when receiving/returning i1
+define x86_regcallcc i1 @test_argReti1(i1 %a) {
+; WIN32-LABEL: test_argReti1:
+; WIN32: # %bb.0:
+; WIN32-NEXT: incb %cl
+; WIN32-NEXT: # kill: def $cl killed $cl killed $ecx
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_argReti1:
+; WIN64: # %bb.0:
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: test_argReti1:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax
+; LINUXOSX-NEXT: retq
+ %add = add i1 %a, 1
+ ret i1 %add
+}
+
+; Test regcall when passing/retrieving i1
+define x86_regcallcc i1 @test_CallargReti1(i1 %a) {
+; WIN32-LABEL: test_CallargReti1:
+; WIN32: # %bb.0:
+; WIN32-NEXT: incb %cl
+; WIN32-NEXT: movzbl %cl, %ecx
+; WIN32-NEXT: calll _test_argReti1
+; WIN32-NEXT: incb %cl
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: test_CallargReti1:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rax
+; WIN64-NEXT: .seh_stackalloc 8
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: movzbl %al, %eax
+; WIN64-NEXT: callq test_argReti1
+; WIN64-NEXT: incb %al
+; WIN64-NEXT: popq %rcx
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
+; LINUXOSX-LABEL: test_CallargReti1:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: pushq %rax
+; LINUXOSX-NEXT: .cfi_def_cfa_offset 16
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: movzbl %al, %eax
+; LINUXOSX-NEXT: callq *test_argReti1 at GOTPCREL(%rip)
+; LINUXOSX-NEXT: incb %al
+; LINUXOSX-NEXT: popq %rcx
+; LINUXOSX-NEXT: .cfi_def_cfa_offset 8
+; LINUXOSX-NEXT: retq
+ %b = add i1 %a, 1
+ %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
+ %d = add i1 %c, 1
+ ret i1 %d
+}
+
+;test calling conventions - input parameters, callee saved xmms
+define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
+; WIN32-LABEL: testf32_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $32, %esp
+; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm6, %xmm7
+; WIN32-NEXT: movaps %xmm5, %xmm6
+; WIN32-NEXT: movaps %xmm3, %xmm5
+; WIN32-NEXT: movaps %xmm2, %xmm3
+; WIN32-NEXT: movaps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm0, %xmm1
+; WIN32-NEXT: addps %xmm4, %xmm0
+; WIN32-NEXT: mulps %xmm4, %xmm1
+; WIN32-NEXT: subps %xmm1, %xmm0
+; WIN32-NEXT: movups 8(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm0
+; WIN32-NEXT: movaps %xmm2, %xmm4
+; WIN32-NEXT: addps %xmm6, %xmm4
+; WIN32-NEXT: mulps %xmm6, %xmm2
+; WIN32-NEXT: subps %xmm2, %xmm4
+; WIN32-NEXT: movups 24(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm4
+; WIN32-NEXT: movaps %xmm3, %xmm2
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: mulps %xmm7, %xmm3
+; WIN32-NEXT: subps %xmm3, %xmm2
+; WIN32-NEXT: movups 40(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm5, %xmm3
+; WIN32-NEXT: movaps (%esp), %xmm1 # 16-byte Reload
+; WIN32-NEXT: addps %xmm1, %xmm3
+; WIN32-NEXT: mulps %xmm1, %xmm5
+; WIN32-NEXT: subps %xmm5, %xmm3
+; WIN32-NEXT: movups 56(%ebp), %xmm1
+; WIN32-NEXT: addps %xmm1, %xmm3
+; WIN32-NEXT: movaps %xmm4, %xmm1
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf32_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill
+; WIN64-NEXT: movaps %xmm0, %xmm12
+; WIN64-NEXT: addps %xmm4, %xmm12
+; WIN64-NEXT: movaps %xmm1, %xmm13
+; WIN64-NEXT: addps %xmm5, %xmm13
+; WIN64-NEXT: movaps %xmm2, %xmm14
+; WIN64-NEXT: addps %xmm6, %xmm14
+; WIN64-NEXT: movaps %xmm3, %xmm15
+; WIN64-NEXT: addps %xmm7, %xmm15
+; WIN64-NEXT: mulps %xmm4, %xmm0
+; WIN64-NEXT: subps %xmm0, %xmm12
+; WIN64-NEXT: mulps %xmm5, %xmm1
+; WIN64-NEXT: subps %xmm1, %xmm13
+; WIN64-NEXT: mulps %xmm6, %xmm2
+; WIN64-NEXT: subps %xmm2, %xmm14
+; WIN64-NEXT: mulps %xmm7, %xmm3
+; WIN64-NEXT: subps %xmm3, %xmm15
+; WIN64-NEXT: addps %xmm8, %xmm12
+; WIN64-NEXT: addps %xmm9, %xmm13
+; WIN64-NEXT: addps %xmm10, %xmm14
+; WIN64-NEXT: addps %xmm11, %xmm15
+; WIN64-NEXT: movaps %xmm12, %xmm0
+; WIN64-NEXT: movaps %xmm13, %xmm1
+; WIN64-NEXT: movaps %xmm14, %xmm2
+; WIN64-NEXT: movaps %xmm15, %xmm3
+; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
+; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testf32_inp:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX-NEXT: movaps %xmm0, %xmm12
+; LINUXOSX-NEXT: addps %xmm4, %xmm12
+; LINUXOSX-NEXT: movaps %xmm1, %xmm13
+; LINUXOSX-NEXT: addps %xmm5, %xmm13
+; LINUXOSX-NEXT: movaps %xmm2, %xmm14
+; LINUXOSX-NEXT: addps %xmm6, %xmm14
+; LINUXOSX-NEXT: movaps %xmm3, %xmm15
+; LINUXOSX-NEXT: addps %xmm7, %xmm15
+; LINUXOSX-NEXT: mulps %xmm4, %xmm0
+; LINUXOSX-NEXT: subps %xmm0, %xmm12
+; LINUXOSX-NEXT: mulps %xmm5, %xmm1
+; LINUXOSX-NEXT: subps %xmm1, %xmm13
+; LINUXOSX-NEXT: mulps %xmm6, %xmm2
+; LINUXOSX-NEXT: subps %xmm2, %xmm14
+; LINUXOSX-NEXT: mulps %xmm7, %xmm3
+; LINUXOSX-NEXT: subps %xmm3, %xmm15
+; LINUXOSX-NEXT: addps %xmm8, %xmm12
+; LINUXOSX-NEXT: addps %xmm9, %xmm13
+; LINUXOSX-NEXT: addps %xmm10, %xmm14
+; LINUXOSX-NEXT: addps %xmm11, %xmm15
+; LINUXOSX-NEXT: movaps %xmm12, %xmm0
+; LINUXOSX-NEXT: movaps %xmm13, %xmm1
+; LINUXOSX-NEXT: movaps %xmm14, %xmm2
+; LINUXOSX-NEXT: movaps %xmm15, %xmm3
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
+; LINUXOSX-NEXT: retq
+ %x1 = fadd <16 x float> %a, %b
+ %x2 = fmul <16 x float> %a, %b
+ %x3 = fsub <16 x float> %x1, %x2
+ %x4 = fadd <16 x float> %x3, %c
+ ret <16 x float> %x4
+}
+
+;test calling conventions - input parameters, callee saved GPRs
+define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
+; WIN32-LABEL: testi32_inp:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: pushl %ebx
+; WIN32-NEXT: subl $8, %esp
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: leal (%esi,%eax), %ecx
+; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: subl %eax, %ecx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: subl %edx, %eax
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: imull %eax, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %ecx, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl %ebp, %ebx
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: imull %ebx, %ecx
+; WIN32-NEXT: addl %eax, %ecx
+; WIN32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %eax, %edi
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; WIN32-NEXT: addl %esi, %edi
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: imull %ebp, %edx
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: addl %ecx, %edi
+; WIN32-NEXT: movl %edi, %ecx
+; WIN32-NEXT: addl $8, %esp
+; WIN32-NEXT: popl %ebx
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testi32_inp:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbx
+; WIN64-NEXT: # kill: def $edx killed $edx def $rdx
+; WIN64-NEXT: # kill: def $esi killed $esi def $rsi
+; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14
+; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12
+; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11
+; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9
+; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8
+; WIN64-NEXT: # kill: def $edi killed $edi def $rdi
+; WIN64-NEXT: leal (%rdx,%rdi), %ebx
+; WIN64-NEXT: # kill: def $edx killed $edx killed $rdx
+; WIN64-NEXT: subl %edi, %edx
+; WIN64-NEXT: leal (%rsi,%r8), %edi
+; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi
+; WIN64-NEXT: subl %r8d, %esi
+; WIN64-NEXT: leal (%r9,%r11), %r8d
+; WIN64-NEXT: # kill: def $r9d killed $r9d killed $r9
+; WIN64-NEXT: subl %r11d, %r9d
+; WIN64-NEXT: movl %eax, %r11d
+; WIN64-NEXT: subl %ecx, %r11d
+; WIN64-NEXT: imull %r11d, %r9d
+; WIN64-NEXT: leal (%r12,%r14), %r11d
+; WIN64-NEXT: # kill: def $r12d killed $r12d killed $r12
+; WIN64-NEXT: subl %r14d, %r12d
+; WIN64-NEXT: imull %edx, %r12d
+; WIN64-NEXT: movl {{[0-9]+}}(%rsp), %edx
+; WIN64-NEXT: addl %r9d, %r12d
+; WIN64-NEXT: movl %r15d, %r9d
+; WIN64-NEXT: subl %edx, %r9d
+; WIN64-NEXT: imull %esi, %r9d
+; WIN64-NEXT: addl %r12d, %r9d
+; WIN64-NEXT: addl %ecx, %eax
+; WIN64-NEXT: imull %r8d, %eax
+; WIN64-NEXT: imull %ebx, %r11d
+; WIN64-NEXT: addl %r11d, %eax
+; WIN64-NEXT: addl %r15d, %edx
+; WIN64-NEXT: imull %edi, %edx
+; WIN64-NEXT: addl %edx, %eax
+; WIN64-NEXT: addl %r9d, %eax
+; WIN64-NEXT: popq %rbx
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testi32_inp:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx
+; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi
+; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14
+; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13
+; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12
+; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9
+; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8
+; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi
+; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d
+; LINUXOSX-NEXT: # kill: def $edx killed $edx killed $rdx
+; LINUXOSX-NEXT: subl %edi, %edx
+; LINUXOSX-NEXT: leal (%rsi,%r8), %edi
+; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi
+; LINUXOSX-NEXT: subl %r8d, %esi
+; LINUXOSX-NEXT: leal (%r9,%r12), %r8d
+; LINUXOSX-NEXT: # kill: def $r9d killed $r9d killed $r9
+; LINUXOSX-NEXT: subl %r12d, %r9d
+; LINUXOSX-NEXT: movl %eax, %r11d
+; LINUXOSX-NEXT: subl %ecx, %r11d
+; LINUXOSX-NEXT: imull %r11d, %r9d
+; LINUXOSX-NEXT: leal (%r13,%r14), %r11d
+; LINUXOSX-NEXT: movl %r13d, %r12d
+; LINUXOSX-NEXT: subl %r14d, %r12d
+; LINUXOSX-NEXT: imull %edx, %r12d
+; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %edx
+; LINUXOSX-NEXT: addl %r9d, %r12d
+; LINUXOSX-NEXT: movl %r15d, %r9d
+; LINUXOSX-NEXT: subl %edx, %r9d
+; LINUXOSX-NEXT: imull %esi, %r9d
+; LINUXOSX-NEXT: addl %r12d, %r9d
+; LINUXOSX-NEXT: addl %ecx, %eax
+; LINUXOSX-NEXT: imull %r8d, %eax
+; LINUXOSX-NEXT: imull %r10d, %r11d
+; LINUXOSX-NEXT: addl %r11d, %eax
+; LINUXOSX-NEXT: addl %r15d, %edx
+; LINUXOSX-NEXT: imull %edi, %edx
+; LINUXOSX-NEXT: addl %edx, %eax
+; LINUXOSX-NEXT: addl %r9d, %eax
+; LINUXOSX-NEXT: retq
+ i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
+ %x1 = sub i32 %a1, %a2
+ %x2 = sub i32 %a3, %a4
+ %x3 = sub i32 %a5, %a6
+ %y1 = sub i32 %b1, %b2
+ %y2 = sub i32 %b3, %b4
+ %y3 = sub i32 %b5, %b6
+ %v1 = add i32 %a1, %a2
+ %v2 = add i32 %a3, %a4
+ %v3 = add i32 %a5, %a6
+ %w1 = add i32 %b1, %b2
+ %w2 = add i32 %b3, %b4
+ %w3 = add i32 %b5, %b6
+ %s1 = mul i32 %x1, %y1
+ %s2 = mul i32 %x2, %y2
+ %s3 = mul i32 %x3, %y3
+ %t1 = mul i32 %v1, %w1
+ %t2 = mul i32 %v2, %w2
+ %t3 = mul i32 %v3, %w3
+ %m1 = add i32 %s1, %s2
+ %m2 = add i32 %m1, %s3
+ %n1 = add i32 %t1, %t2
+ %n2 = add i32 %n1, %t3
+ %r1 = add i32 %m2, %n2
+ ret i32 %r1
+}
+
+; Test that parameters, overflowing register capacity, are passed through the stack
+define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
+; WIN32-LABEL: testf32_stack:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: movl %esp, %ebp
+; WIN32-NEXT: andl $-16, %esp
+; WIN32-NEXT: subl $48, %esp
+; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill
+; WIN32-NEXT: movaps %xmm5, %xmm6
+; WIN32-NEXT: movaps %xmm4, %xmm5
+; WIN32-NEXT: movaps %xmm3, %xmm4
+; WIN32-NEXT: movaps %xmm2, %xmm3
+; WIN32-NEXT: movaps %xmm1, %xmm2
+; WIN32-NEXT: movaps %xmm0, %xmm1
+; WIN32-NEXT: movups 120(%ebp), %xmm7
+; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movups 248(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; WIN32-NEXT: movups 104(%ebp), %xmm7
+; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movups 232(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm0
+; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill
+; WIN32-NEXT: movups 88(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm6
+; WIN32-NEXT: movups 216(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm6
+; WIN32-NEXT: movups 72(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm5
+; WIN32-NEXT: movups 200(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm5
+; WIN32-NEXT: movups 56(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm4
+; WIN32-NEXT: movups 184(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm4
+; WIN32-NEXT: movups 40(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm3
+; WIN32-NEXT: movups 168(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm3
+; WIN32-NEXT: movups 24(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: movups 152(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm2
+; WIN32-NEXT: movups 8(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm1
+; WIN32-NEXT: movups 136(%ebp), %xmm7
+; WIN32-NEXT: addps %xmm7, %xmm1
+; WIN32-NEXT: movaps %xmm1, %xmm0
+; WIN32-NEXT: movaps %xmm2, %xmm1
+; WIN32-NEXT: movaps %xmm3, %xmm2
+; WIN32-NEXT: movaps %xmm4, %xmm3
+; WIN32-NEXT: movaps %xmm5, %xmm4
+; WIN32-NEXT: movaps %xmm6, %xmm5
+; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload
+; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN32-NEXT: movl %ebp, %esp
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+;
+; WIN64-LABEL: testf32_stack:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rax
+; WIN64-NEXT: addps %xmm15, %xmm7
+; WIN64-NEXT: addps %xmm14, %xmm6
+; WIN64-NEXT: addps %xmm13, %xmm5
+; WIN64-NEXT: addps %xmm12, %xmm4
+; WIN64-NEXT: addps %xmm11, %xmm3
+; WIN64-NEXT: addps %xmm10, %xmm2
+; WIN64-NEXT: addps %xmm9, %xmm1
+; WIN64-NEXT: addps %xmm8, %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
+; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
+; WIN64-NEXT: popq %rax
+; WIN64-NEXT: retq
+;
+; LINUXOSX-LABEL: testf32_stack:
+; LINUXOSX: # %bb.0:
+; LINUXOSX-NEXT: addps %xmm15, %xmm7
+; LINUXOSX-NEXT: addps %xmm14, %xmm6
+; LINUXOSX-NEXT: addps %xmm13, %xmm5
+; LINUXOSX-NEXT: addps %xmm12, %xmm4
+; LINUXOSX-NEXT: addps %xmm11, %xmm3
+; LINUXOSX-NEXT: addps %xmm10, %xmm2
+; LINUXOSX-NEXT: addps %xmm9, %xmm1
+; LINUXOSX-NEXT: addps %xmm8, %xmm0
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
+; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
+; LINUXOSX-NEXT: retq
+ %x1 = fadd <32 x float> %a, %b
+ %x2 = fadd <32 x float> %x1, %c
+ ret <32 x float> %x2
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"RegCallv4", i32 1}
More information about the llvm-commits
mailing list