[clang] [llvm] [X86] Reduce -ffixed-r compile-time overhead (PR #184606)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 4 08:24:24 PST 2026
https://github.com/zhouguangyuan0718 updated https://github.com/llvm/llvm-project/pull/184606
>From 85d7f73b79397e455102c9d8996e1ac85328ccc3 Mon Sep 17 00:00:00 2001
From: ZhouGuangyuan <zhouguangyuan.xian at gmail.com>
Date: Wed, 4 Mar 2026 21:09:07 +0800
Subject: [PATCH] [X86] Reduce -ffixed-r compile-time overhead
PR #180242 added reserve-r support across the driver and backend, but it also introduced avoidable compile-time work in hot paths.
In Clang, delay +egpr detection until -ffixed-r16 through -ffixed-r31 are actually queried instead of computing it for every x86_64 invocation.
In LLVM, store X86Subtarget::ReservedRReg in a fixed-size std::bitset and update X86RegisterInfo::getReservedRegs() to iterate only over the reserve-r register ranges instead of scanning every target register.
These changes keep reserve-r behavior unchanged while trimming the extra compile-time overhead introduced by the PR.
Signed-off-by: ZhouGuangyuan <zhouguangyuan.xian at gmail.com>
---
clang/lib/Driver/ToolChains/Arch/X86.cpp | 46 ++++++++++++++----------
llvm/lib/Target/X86/X86RegisterInfo.cpp | 14 ++++----
llvm/lib/Target/X86/X86Subtarget.cpp | 3 +-
llvm/lib/Target/X86/X86Subtarget.h | 5 +--
4 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 6a71512193800..dc3aba67545dc 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -151,16 +151,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
}
const llvm::Triple::ArchType ArchType = Triple.getArch();
- bool HasEGPR = false;
-
- // -ffixed-r16 through -ffixed-r31 are only valid when the selected x86_64
- // CPU enables APX EGPR by default; later -target-feature arguments still get
- // their own validation when translated to backend features.
- if (ArchType == llvm::Triple::x86_64) {
- SmallVector<StringRef, 16> CPUFeatures;
- llvm::X86::getFeaturesForCPU(getX86TargetCPU(D, Args, Triple), CPUFeatures);
- HasEGPR = llvm::is_contained(CPUFeatures, "+egpr");
- }
// Add features to be compatible with gcc for Android.
if (Triple.isAndroid()) {
@@ -241,6 +231,8 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
<< D.getOpts().getOptionName(LVIOpt);
}
+ enum class EGPRFeature { Unknown, Disabled, Enabled };
+ EGPRFeature EGPROpt = EGPRFeature::Unknown;
// Now add any that the user explicitly requested on the command line,
// which may override the defaults.
for (const Arg *A : Args.filtered(options::OPT_m_x86_Features_Group,
@@ -271,13 +263,13 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
if (A->getOption().matches(options::OPT_mapxf) ||
A->getOption().matches(options::OPT_mno_apxf)) {
if (IsNegative) {
- HasEGPR = false;
+ EGPROpt = EGPRFeature::Disabled;
Features.insert(Features.end(),
{"-egpr", "-ndd", "-ccmp", "-nf", "-zu"});
if (!Triple.isOSWindows())
Features.insert(Features.end(), {"-push2pop2", "-ppx"});
} else {
- HasEGPR = true;
+ EGPROpt = EGPRFeature::Enabled;
Features.insert(Features.end(),
{"+egpr", "+ndd", "+ccmp", "+nf", "+zu"});
if (!Triple.isOSWindows())
@@ -303,8 +295,9 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
D.Diag(clang::diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Value;
- if (Value == "egpr")
- HasEGPR = !IsNegative;
+ if (Value == "egpr") {
+ EGPROpt = IsNegative ? EGPRFeature::Disabled : EGPRFeature::Enabled;
+ }
Features.push_back(
Args.MakeArgString((IsNegative ? "-" : "+") + Value));
@@ -312,8 +305,9 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
continue;
}
- if (Name == "egpr")
- HasEGPR = !IsNegative;
+ if (Name == "egpr") {
+ EGPROpt = IsNegative ? EGPRFeature::Disabled : EGPRFeature::Enabled;
+ }
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
@@ -366,9 +360,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
RESERVE_REG(r13)
RESERVE_REG(r14)
RESERVE_REG(r15)
+#undef RESERVE_REG
+
+ bool NeedDetectEGPR = Args.hasArg(
+ options::OPT_ffixed_r16, options::OPT_ffixed_r17, options::OPT_ffixed_r18,
+ options::OPT_ffixed_r19, options::OPT_ffixed_r20, options::OPT_ffixed_r21,
+ options::OPT_ffixed_r22, options::OPT_ffixed_r23, options::OPT_ffixed_r24,
+ options::OPT_ffixed_r25, options::OPT_ffixed_r26, options::OPT_ffixed_r27,
+ options::OPT_ffixed_r28, options::OPT_ffixed_r29, options::OPT_ffixed_r30,
+ options::OPT_ffixed_r31);
+ if (NeedDetectEGPR && EGPROpt == EGPRFeature::Unknown &&
+ ArchType == llvm::Triple::x86_64) {
+ SmallVector<StringRef, 16> CPUFeatures;
+ llvm::X86::getFeaturesForCPU(getX86TargetCPU(D, Args, Triple), CPUFeatures);
+ EGPROpt = llvm::is_contained(CPUFeatures, "+egpr")
+ ? EGPRFeature::Enabled
+ : EGPRFeature::Disabled;
+ }
#define RESERVE_EGPR(REG) \
if (Args.hasArg(options::OPT_ffixed_##REG)) { \
- if (!HasEGPR) \
+ if (EGPROpt != EGPRFeature::Enabled) \
D.Diag(diag::err_drv_unsupported_opt_for_target) \
<< "-ffixed-" #REG << Triple.getTriple(); \
else \
@@ -391,5 +402,4 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
RESERVE_EGPR(r30)
RESERVE_EGPR(r31)
#undef RESERVE_EGPR
-#undef RESERVE_REG
}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 42d1bedb350f6..86b9cdc366a85 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -516,14 +516,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::SSP);
auto &ST = MF.getSubtarget<X86Subtarget>();
- if (ST.is64Bit()) {
- for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
- // Set r# as reserved register if user required
- if (ST.isRegisterReservedByUser(Reg)) {
+ if (ST.is64Bit() && ST.hasUserReservedRegisters()) {
+ // Set r# as reserved register if user required
+ for (unsigned Reg = X86::R8; Reg <= X86::R15; ++Reg)
+ if (ST.isRegisterReservedByUser(Reg))
+ for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
+ Reserved.set(SubReg);
+ for (unsigned Reg = X86::R16; Reg <= X86::R31; ++Reg)
+ if (ST.isRegisterReservedByUser(Reg))
for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
Reserved.set(SubReg);
- }
- }
}
// Set the instruction pointer register and its aliases as reserved.
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index f73ca37a45c07..4e2e98410f325 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -317,8 +317,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
: X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
- PICStyle(PICStyles::Style::None), TM(TM),
- ReservedRReg(X86::NUM_TARGET_REGS), TargetTriple(TT),
+ PICStyle(PICStyles::Style::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index dd9a5de3030ed..2e3c23eeca35b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -17,10 +17,10 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/TargetParser/Triple.h"
+#include <bitset>
#include <climits>
#include <memory>
@@ -67,7 +67,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool ATTRIBUTE = DEFAULT;
#include "X86GenSubtargetInfo.inc"
/// ReservedRReg R#i is not available as a general purpose register.
- BitVector ReservedRReg;
+ std::bitset<X86::NUM_TARGET_REGS> ReservedRReg;
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -162,6 +162,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool isRegisterReservedByUser(Register i) const override {
return ReservedRReg[i.id()];
}
+ bool hasUserReservedRegisters() const { return ReservedRReg.any(); }
private:
/// Initialize the full set of dependencies so we can use an initializer
More information about the cfe-commits
mailing list