[clang] [llvm] [WIP][APX] Allow EGPR registers used as non-volatile registers (PR #173224)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 23 20:28:25 PDT 2026
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/173224
>From 5d765591ccb28b4323a4616e9a63a6795034d603 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Mon, 22 Dec 2025 15:36:20 +0800
Subject: [PATCH 1/3] [WIP][APX] Allow EGPR registers used as non-volatile
registers
---
clang/include/clang/Options/Options.td | 2 +
clang/lib/Driver/ToolChains/Arch/X86.cpp | 3 +
clang/lib/Driver/ToolChains/CommonArgs.cpp | 6 ++
llvm/lib/Target/X86/X86CallingConv.td | 17 ++++++
llvm/lib/Target/X86/X86RegisterInfo.cpp | 65 ++++++++++++++++++++--
5 files changed, 89 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 8cd31a3be109a..c98bb969b28d9 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -9330,6 +9330,8 @@ def : CLFlag<"Qscatter-">, Alias<mno_scatter>,
// Non-aliases:
+def _SLASH_apx_features_egprnv : CLCompileJoined<"apx-features=egpr-nv:">,
+ HelpText<"Set number of non-volatile registers of APX features">;
def _SLASH_arch : CLCompileJoined<"arch:">,
HelpText<"Set architecture for code generation">;
def _SLASH_vlen : CLFlag<"vlen">,
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index d6e6657c521f0..e90edd0a203b1 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -277,6 +277,9 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
+ if (Args.hasArgNoClaim(options::OPT__SLASH_apx_features_egprnv))
+ Features.push_back("+egpr");
+
// Enable/disable straight line speculation hardening.
if (Arg *A = Args.getLastArg(options::OPT_mharden_sls_EQ)) {
StringRef Scope = A->getValue();
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 882283a99d4f1..ce4a199668b60 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -922,6 +922,12 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
CmdArgs.push_back(Feature.data());
}
+
+ if (Arg *A = Args.getLastArg(options::OPT__SLASH_apx_features_egprnv)) {
+ StringRef Value = A->getValue();
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back(Args.MakeArgString("-apx-egpr-csr=" + Value));
+ }
}
llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index f020e0b55141c..e62e5292f6374 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1134,6 +1134,23 @@ def CSR_Win64_NoSSE : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R1
def CSR_Win64 : CalleeSavedRegs<(add CSR_Win64_NoSSE,
(sequence "XMM%u", 6, 15))>;
+def CSR_Win64_EGPR1 : CalleeSavedRegs<(add CSR_Win64, R31)>;
+def CSR_Win64_EGPR2 : CalleeSavedRegs<(add CSR_Win64_EGPR1, R30)>;
+def CSR_Win64_EGPR3 : CalleeSavedRegs<(add CSR_Win64_EGPR2, R29)>;
+def CSR_Win64_EGPR4 : CalleeSavedRegs<(add CSR_Win64_EGPR3, R28)>;
+def CSR_Win64_EGPR5 : CalleeSavedRegs<(add CSR_Win64_EGPR4, R27)>;
+def CSR_Win64_EGPR6 : CalleeSavedRegs<(add CSR_Win64_EGPR5, R26)>;
+def CSR_Win64_EGPR7 : CalleeSavedRegs<(add CSR_Win64_EGPR6, R25)>;
+def CSR_Win64_EGPR8 : CalleeSavedRegs<(add CSR_Win64_EGPR7, R24)>;
+def CSR_Win64_EGPR9 : CalleeSavedRegs<(add CSR_Win64_EGPR8, R23)>;
+def CSR_Win64_EGPR10 : CalleeSavedRegs<(add CSR_Win64_EGPR9, R22)>;
+def CSR_Win64_EGPR11 : CalleeSavedRegs<(add CSR_Win64_EGPR10, R21)>;
+def CSR_Win64_EGPR12 : CalleeSavedRegs<(add CSR_Win64_EGPR11, R20)>;
+def CSR_Win64_EGPR13 : CalleeSavedRegs<(add CSR_Win64_EGPR12, R19)>;
+def CSR_Win64_EGPR14 : CalleeSavedRegs<(add CSR_Win64_EGPR13, R18)>;
+def CSR_Win64_EGPR15 : CalleeSavedRegs<(add CSR_Win64_EGPR14, R17)>;
+def CSR_Win64_EGPR16 : CalleeSavedRegs<(add CSR_Win64_EGPR15, R16)>;
+
def CSR_Win64_SwiftError : CalleeSavedRegs<(sub CSR_Win64, R12)>;
def CSR_Win64_SwiftTail : CalleeSavedRegs<(sub CSR_Win64, R13, R14)>;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 72f38133e21ff..b22e0ab704786 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -52,6 +52,9 @@ static cl::opt<bool>
extern cl::opt<bool> X86EnableAPXForRelocation;
+static cl::opt<unsigned> ApxEgprCSR("apx-egpr-csr", cl::init(0),
+ cl::desc("Set N egpr as callee-saved register for APX calling convention"));
+
X86RegisterInfo::X86RegisterInfo(const Triple &TT)
: X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP),
X86_MC::getDwarfRegFlavour(TT, false),
@@ -245,6 +248,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
bool CallsEHReturn = MF->callsEHReturn();
+ bool HasEGPR = Subtarget.hasEGPR();
CallingConv::ID CC = F.getCallingConv();
@@ -258,6 +262,32 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
return CSR_NoRegs_SaveList;
+ auto CSR_Win64_Or_EGPR_SaveList = [HasEGPR]() {
+ if (!HasEGPR)
+ return CSR_Win64_SaveList;
+
+ switch (ApxEgprCSR) {
+ case 0: return CSR_Win64_SaveList;
+ case 1: return CSR_Win64_EGPR1_SaveList;
+ case 2: return CSR_Win64_EGPR2_SaveList;
+ case 3: return CSR_Win64_EGPR3_SaveList;
+ case 4: return CSR_Win64_EGPR4_SaveList;
+ case 5: return CSR_Win64_EGPR5_SaveList;
+ case 6: return CSR_Win64_EGPR6_SaveList;
+ case 7: return CSR_Win64_EGPR7_SaveList;
+ case 8: return CSR_Win64_EGPR8_SaveList;
+ case 9: return CSR_Win64_EGPR9_SaveList;
+ case 10: return CSR_Win64_EGPR10_SaveList;
+ case 11: return CSR_Win64_EGPR11_SaveList;
+ case 12: return CSR_Win64_EGPR12_SaveList;
+ case 13: return CSR_Win64_EGPR13_SaveList;
+ case 14: return CSR_Win64_EGPR14_SaveList;
+ case 15: return CSR_Win64_EGPR15_SaveList;
+ case 16: return CSR_Win64_EGPR16_SaveList;
+ default: llvm_unreachable("Invalid reg number!");
+ }
+ };
+
switch (CC) {
case CallingConv::GHC:
case CallingConv::HiPE:
@@ -317,7 +347,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
case CallingConv::Win64:
if (!HasSSE)
return CSR_Win64_NoSSE_SaveList;
- return CSR_Win64_SaveList;
+ return CSR_Win64_Or_EGPR_SaveList();
case CallingConv::SwiftTail:
if (!Is64Bit)
return CSR_32_SaveList;
@@ -356,7 +386,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_64_SwiftError_SaveList;
if (IsWin64 || IsUEFI64)
- return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
+ return HasSSE ? CSR_Win64_Or_EGPR_SaveList() : CSR_Win64_NoSSE_SaveList;
if (CallsEHReturn)
return CSR_64EHRet_SaveList;
return CSR_64_SaveList;
@@ -386,6 +416,33 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasEGPR = Subtarget.hasEGPR();
+
+ auto CSR_Win64_Or_EGPR_RegMask = [HasEGPR]() {
+ if (!HasEGPR)
+ return CSR_Win64_RegMask;
+
+ switch (ApxEgprCSR) {
+ case 0: return CSR_Win64_RegMask;
+ case 1: return CSR_Win64_EGPR1_RegMask;
+ case 2: return CSR_Win64_EGPR2_RegMask;
+ case 3: return CSR_Win64_EGPR3_RegMask;
+ case 4: return CSR_Win64_EGPR4_RegMask;
+ case 5: return CSR_Win64_EGPR5_RegMask;
+ case 6: return CSR_Win64_EGPR6_RegMask;
+ case 7: return CSR_Win64_EGPR7_RegMask;
+ case 8: return CSR_Win64_EGPR8_RegMask;
+ case 9: return CSR_Win64_EGPR9_RegMask;
+ case 10: return CSR_Win64_EGPR10_RegMask;
+ case 11: return CSR_Win64_EGPR11_RegMask;
+ case 12: return CSR_Win64_EGPR12_RegMask;
+ case 13: return CSR_Win64_EGPR13_RegMask;
+ case 14: return CSR_Win64_EGPR14_RegMask;
+ case 15: return CSR_Win64_EGPR15_RegMask;
+ case 16: return CSR_Win64_EGPR16_RegMask;
+ default: llvm_unreachable("Invalid reg number!");
+ }
+ };
switch (CC) {
case CallingConv::GHC:
@@ -442,7 +499,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_MostRegs_RegMask;
break;
case CallingConv::Win64:
- return CSR_Win64_RegMask;
+ return CSR_Win64_Or_EGPR_RegMask();
case CallingConv::SwiftTail:
if (!Is64Bit)
return CSR_32_RegMask;
@@ -480,7 +537,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (IsSwiftCC)
return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
- return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
+ return (IsWin64 || IsUEFI64) ? CSR_Win64_Or_EGPR_RegMask() : CSR_64_RegMask;
}
return CSR_32_RegMask;
>From 5843987c512e32bffd437db2e06eb3c67d6cced8 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Tue, 17 Mar 2026 17:02:48 +0800
Subject: [PATCH 2/3] Adjust register priority
---
llvm/lib/Target/X86/X86RegisterInfo.td | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 692e42ae5e752..dfb5ceae0dae3 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -544,9 +544,9 @@ def SSP : X86Reg<"ssp", 0>;
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R22B,
- R23B, R24B, R25B, R26B, R27B, R30B, R31B, R14B,
- R15B, R12B, R13B, R20B, R21B, R28B, R29B)> {
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B, R16B,
+ R17B, R18B, R19B, R22B, R23B, R24B, R25B, R26B,
+ R27B, R30B, R31B, R20B, R21B, R28B, R29B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -561,9 +561,9 @@ def GRH8 : RegisterClass<"X86", [i8], 8,
R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
- R11W, R16W, R17W, R18W, R19W, R22W, R23W, R24W,
- R25W, R26W, R27W, R30W, R31W, R14W, R15W, R12W,
- R13W, R20W, R21W, R28W, R29W)>;
+ R11W, R14W, R15W, R12W, R13W, R16W, R17W, R18W,
+ R19W, R22W, R23W, R24W, R25W, R26W, R27W, R30W,
+ R31W, R20W, R21W, R28W, R29W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
@@ -573,9 +573,9 @@ def GRH16 : RegisterClass<"X86", [i16], 16,
R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
- R10D, R11D, R16D, R17D, R18D, R19D, R22D, R23D,
- R24D, R25D, R26D, R27D, R30D, R31D, R14D, R15D,
- R12D, R13D, R20D, R21D, R28D, R29D)>;
+ R10D, R11D, R14D, R15D, R12D, R13D, R16D, R17D,
+ R18D, R19D, R22D, R23D, R24D, R25D, R26D, R27D,
+ R30D, R31D, R20D, R21D, R28D, R29D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -583,9 +583,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
- R18, R19, R22, R23, R24, R25, R26, R27, R30, R31, RBX,
- R14, R15, R12, R13, R20, R21, R28, R29, RBP, RSP, RIP)>;
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14,
+ R15, R12, R13, RBP, R16, R17, R18, R19, R22, R23, R24,
+ R25, R26, R27, R30, R31, R20, R21, R28, R29, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
>From f7f431ddbcd4fbab9ceb01fde42c04ceebaecdf1 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Tue, 24 Mar 2026 11:28:00 +0800
Subject: [PATCH 3/3] Apply azwolski's diff
---
clang/include/clang/Options/Options.td | 2 ++
clang/lib/Driver/ToolChains/Arch/X86.cpp | 8 ++++++++
clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 ++-
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index c98bb969b28d9..022901ec2fe6d 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -6999,6 +6999,8 @@ def mapx_features_EQ : CommaJoined<["-"], "mapx-features=">, Group<m_x86_Feature
HelpText<"Enable features of APX">, Values<"egpr,push2pop2,ppx,ndd,ccmp,nf,cf,zu">, Visibility<[ClangOption, CLOption, FlangOption]>;
def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_Features_Group>,
HelpText<"Disable features of APX">, Values<"egpr,push2pop2,ppx,ndd,ccmp,nf,cf,zu">, Visibility<[ClangOption, CLOption, FlangOption]>;
+def mapx_features_egpr_nv_EQ : Joined<["-"], "mapx-features=egpr-nv:">, Group<m_x86_Features_Group>,
+ HelpText<"Set number of non-volatile registers of APX EGPR feature">;
def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>,
AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf","zu"]>,
Group<m_x86_Features_Group>;
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index e90edd0a203b1..74767a0d6b73e 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -272,11 +272,19 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
}
continue;
}
+
+ // Handle -mapx-features=egpr-nv:N
+ if (A->getOption().matches(options::OPT_mapx_features_egpr_nv_EQ)) {
+ Features.push_back("+egpr");
+ continue;
+ }
+
if (IsNegative)
Name = Name.substr(3);
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
+ // Handle Windows-style /apx-features=egpr-nv:N option
if (Args.hasArgNoClaim(options::OPT__SLASH_apx_features_egprnv))
Features.push_back("+egpr");
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index ce4a199668b60..0f5bcf5f58836 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -923,7 +923,8 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
CmdArgs.push_back(Feature.data());
}
- if (Arg *A = Args.getLastArg(options::OPT__SLASH_apx_features_egprnv)) {
+ if (Arg *A = Args.getLastArg(options::OPT__SLASH_apx_features_egprnv,
+ options::OPT_mapx_features_egpr_nv_EQ)) {
StringRef Value = A->getValue();
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-apx-egpr-csr=" + Value));
More information about the llvm-commits
mailing list