[llvm] Add SVML calling convention (PR #67884)
Yevhenii Havrylko via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 30 06:55:01 PDT 2023
https://github.com/ZzEeKkAa created https://github.com/llvm/llvm-project/pull/67884
Intel SVML (Short Vector Math Library) requires custom calling conventions, which are different from system ones (especially on Windows). This patch adds SVML-specific calling conventions to LLVM, including enums, parsing/printing and tablegen. The actual usage in vectorizer will be done in follow up patches.
>From e40e69dc1b8a78bc85cf7c1892e45b57d237d552 Mon Sep 17 00:00:00 2001
From: Yevhenii Havrylko <yevhenii.havrylko at intel.com>
Date: Fri, 29 Sep 2023 15:08:05 -0400
Subject: [PATCH] Add SVML calling convention
---
llvm/include/llvm/AsmParser/LLToken.h | 3 +
llvm/include/llvm/IR/CallingConv.h | 5 ++
llvm/lib/AsmParser/LLLexer.cpp | 3 +
llvm/lib/AsmParser/LLParser.cpp | 6 ++
llvm/lib/IR/AsmWriter.cpp | 3 +
llvm/lib/IR/Verifier.cpp | 3 +
llvm/lib/Target/X86/X86CallingConv.td | 70 +++++++++++++++++++++
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 3 +-
llvm/lib/Target/X86/X86RegisterInfo.cpp | 44 +++++++++++++
llvm/lib/Target/X86/X86Subtarget.h | 3 +
llvm/test/Feature/callingconventions.ll | 12 ++++
11 files changed, 154 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 673dc58ce6451e3..5c96aefd1d74a9c 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -130,6 +130,9 @@ enum Kind {
kw_fastcc,
kw_coldcc,
kw_intel_ocl_bicc,
+ kw_intel_svmlcc128,
+ kw_intel_svmlcc256,
+ kw_intel_svmlcc512,
kw_cfguard_checkcc,
kw_x86_stdcallcc,
kw_x86_fastcallcc,
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index e97623b29f5230d..eb069c5335b8b57 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -245,6 +245,11 @@ namespace CallingConv {
/// placement. Preserves active lane values for input VGPRs.
AMDGPU_CS_ChainPreserve = 105,
+ /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
+ Intel_SVML128 = 106,
+ Intel_SVML256 = 107,
+ Intel_SVML512 = 108,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 466bdebc001f589..00fda59a46164ed 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -606,6 +606,9 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(spir_kernel);
KEYWORD(spir_func);
KEYWORD(intel_ocl_bicc);
+ KEYWORD(intel_svmlcc128);
+ KEYWORD(intel_svmlcc256);
+ KEYWORD(intel_svmlcc512);
KEYWORD(x86_64_sysvcc);
KEYWORD(win64cc);
KEYWORD(x86_regcallcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 04eabc94cfc6abe..9dec60090513d6d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1955,6 +1955,9 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'ccc'
/// ::= 'fastcc'
/// ::= 'intel_ocl_bicc'
+/// ::= 'intel_svmlcc128'
+/// ::= 'intel_svmlcc256'
+/// ::= 'intel_svmlcc512'
/// ::= 'coldcc'
/// ::= 'cfguard_checkcc'
/// ::= 'x86_stdcallcc'
@@ -2034,6 +2037,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break;
case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
+ case lltok::kw_intel_svmlcc128:CC = CallingConv::Intel_SVML128; break;
+ case lltok::kw_intel_svmlcc256:CC = CallingConv::Intel_SVML256; break;
+ case lltok::kw_intel_svmlcc512:CC = CallingConv::Intel_SVML512; break;
case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break;
case lltok::kw_win64cc: CC = CallingConv::Win64; break;
case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break;
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index e190d82127908db..7680986857a0280 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -304,6 +304,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break;
case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
+ case CallingConv::Intel_SVML128: Out << "intel_svmlcc128"; break;
+ case CallingConv::Intel_SVML256: Out << "intel_svmlcc256"; break;
+ case CallingConv::Intel_SVML512: Out << "intel_svmlcc512"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 5a3328416db3eb0..fb241de0740fa76 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2640,6 +2640,9 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::Fast:
case CallingConv::Cold:
case CallingConv::Intel_OCL_BI:
+ case CallingConv::Intel_SVML128:
+ case CallingConv::Intel_SVML256:
+ case CallingConv::Intel_SVML512:
case CallingConv::PTX_Kernel:
case CallingConv::PTX_Device:
Check(!F.isVarArg(),
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 19a295cd109627e..ab7f6728f8e2e31 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -525,6 +525,21 @@ def RetCC_X86_64 : CallingConv<[
CCDelegateTo<RetCC_X86_64_C>
]>;
+// Intel_SVML return-value convention.
+def RetCC_Intel_SVML : CallingConv<[
+ // Vector types are returned in XMM0,XMM1
+ CCIfType<[v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1]>>,
+
+ // 256-bit FP vectors
+ CCIfType<[v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1]>>,
+
+ // 512-bit FP vectors
+ CCIfType<[v16f32, v8f64],
+ CCAssignToReg<[ZMM0,ZMM1]>>
+]>;
+
// This is the return-value convention used for the entire X86 backend.
let Entry = 1 in
def RetCC_X86 : CallingConv<[
@@ -532,6 +547,10 @@ def RetCC_X86 : CallingConv<[
// Check if this is the Intel OpenCL built-ins calling convention
CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<RetCC_Intel_SVML>>,
+ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<RetCC_Intel_SVML>>,
+ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<RetCC_Intel_SVML>>,
+
CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
CCDelegateTo<RetCC_X86_32>
]>;
@@ -1082,6 +1101,30 @@ def CC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<CC_X86_32_C>
]>;
+// X86-64 Intel Short Vector Math Library calling convention.
+def CC_Intel_SVML : CallingConv<[
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2]>>,
+
+ // The 512-bit vector arguments are passed in ZMM registers.
+ CCIfType<[v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
+]>;
+
+def CC_X86_32_Intr : CallingConv<[
+ CCAssignToStack<4, 4>
+]>;
+
+def CC_X86_64_Intr : CallingConv<[
+ CCAssignToStack<8, 8>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@@ -1135,6 +1178,9 @@ def CC_X86_64 : CallingConv<[
let Entry = 1 in
def CC_X86 : CallingConv<[
CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
+ CCIfCC<"CallingConv::Intel_SVML128", CCDelegateTo<CC_Intel_SVML>>,
+ CCIfCC<"CallingConv::Intel_SVML256", CCDelegateTo<CC_Intel_SVML>>,
+ CCIfCC<"CallingConv::Intel_SVML512", CCDelegateTo<CC_Intel_SVML>>,
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
CCDelegateTo<CC_X86_32>
]>;
@@ -1244,3 +1290,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
(sequence "R%u", 12, 15))>;
def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
(sequence "XMM%u", 8, 15))>;
+
+// SVML calling convention
+def CSR_32_Intel_SVML : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
+def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
+ K4, K5, K6, K7)>;
+
+def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
+
+def CSR_64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "XMM%u", 8, 15))>;
+def CSR_Win64_Intel_SVML : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "XMM%u", 6, 15))>;
+
+def CSR_64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "YMM%u", 8, 15))>;
+def CSR_Win64_Intel_SVML_AVX : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "YMM%u", 6, 15))>;
+
+def CSR_64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "ZMM%u", 16, 31),
+ K4, K5, K6, K7)>;
+def CSR_Win64_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+ (sequence "ZMM%u", 6, 21),
+ K4, K5, K6, K7)>;
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index c47ddae072b4fe8..06b0f182e13f6b6 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -1648,7 +1648,8 @@ void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
// FIXME: Only some x86_32 calling conventions support AVX512.
if (Subtarget.useAVX512Regs() &&
(is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
- CallConv == CallingConv::Intel_OCL_BI)))
+ CallConv == CallingConv::Intel_OCL_BI ||
+ CallConv == CallingConv::Intel_SVML512)))
VecVT = MVT::v16f32;
else if (Subtarget.hasAVX())
VecVT = MVT::v8f32;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 3504ca2b5743f88..1f59205935142eb 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -274,6 +274,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
+static std::pair<const uint32_t *, const MCPhysReg *> getSVMLRegMaskAndSaveList(
+ bool Is64Bit, bool IsWin64, CallingConv::ID CC) {
+ assert(CC >= CallingConv::Intel_SVML128 && CC <= CallingConv::Intel_SVML512);
+ unsigned Abi = CC - CallingConv::Intel_SVML128 ; // 0 - 128, 1 - 256, 2 - 512
+
+ const std::pair<const uint32_t *, const MCPhysReg *> Abi64[] = {
+ std::make_pair(CSR_64_Intel_SVML_RegMask, CSR_64_Intel_SVML_SaveList),
+ std::make_pair(CSR_64_Intel_SVML_AVX_RegMask, CSR_64_Intel_SVML_AVX_SaveList),
+ std::make_pair(CSR_64_Intel_SVML_AVX512_RegMask, CSR_64_Intel_SVML_AVX512_SaveList),
+ };
+
+ const std::pair<const uint32_t *, const MCPhysReg *> AbiWin64[] = {
+ std::make_pair(CSR_Win64_Intel_SVML_RegMask, CSR_Win64_Intel_SVML_SaveList),
+ std::make_pair(CSR_Win64_Intel_SVML_AVX_RegMask, CSR_Win64_Intel_SVML_AVX_SaveList),
+ std::make_pair(CSR_Win64_Intel_SVML_AVX512_RegMask, CSR_Win64_Intel_SVML_AVX512_SaveList),
+ };
+
+ const std::pair<const uint32_t *, const MCPhysReg *> Abi32[] = {
+ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList),
+ std::make_pair(CSR_32_Intel_SVML_RegMask, CSR_32_Intel_SVML_SaveList),
+ std::make_pair(CSR_32_Intel_SVML_AVX512_RegMask, CSR_32_Intel_SVML_AVX512_SaveList),
+ };
+
+ if (Is64Bit) {
+ if (IsWin64) {
+ return AbiWin64[Abi];
+ } else {
+ return Abi64[Abi];
+ }
+ } else {
+ return Abi32[Abi];
+ }
+}
+
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "MachineFunction required");
@@ -329,6 +363,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_Intel_OCL_BI_SaveList;
break;
}
+ case CallingConv::Intel_SVML128:
+ case CallingConv::Intel_SVML256:
+ case CallingConv::Intel_SVML512: {
+ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).second;
+ }
case CallingConv::X86_RegCall:
if (Is64Bit) {
if (IsWin64) {
@@ -449,6 +488,11 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_Intel_OCL_BI_RegMask;
break;
}
+ case CallingConv::Intel_SVML128:
+ case CallingConv::Intel_SVML256:
+ case CallingConv::Intel_SVML512: {
+ return getSVMLRegMaskAndSaveList(Is64Bit, IsWin64, CC).first;
+ }
case CallingConv::X86_RegCall:
if (Is64Bit) {
if (IsWin64) {
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index a458b5f9ec8fbb9..9d3e4d099cdc9dc 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -360,6 +360,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
case CallingConv::X86_ThisCall:
case CallingConv::X86_VectorCall:
case CallingConv::Intel_OCL_BI:
+ case CallingConv::Intel_SVML128:
+ case CallingConv::Intel_SVML256:
+ case CallingConv::Intel_SVML512:
return isTargetWin64();
// This convention allows using the Win64 convention on other targets.
case CallingConv::Win64:
diff --git a/llvm/test/Feature/callingconventions.ll b/llvm/test/Feature/callingconventions.ll
index 7304cdf92328cda..6145073c85db304 100644
--- a/llvm/test/Feature/callingconventions.ll
+++ b/llvm/test/Feature/callingconventions.ll
@@ -60,3 +60,15 @@ define void @ghc_caller() {
}
declare i32 @__gxx_personality_v0(...)
+
+define intel_svmlcc128 void @svmlcc128() {
+ ret void
+}
+
+define intel_svmlcc256 void @svmlcc256() {
+ ret void
+}
+
+define intel_svmlcc512 void @svmlcc512() {
+ ret void
+}
More information about the llvm-commits
mailing list