[clang] [llvm] [X86] Reduce -ffixed-r compile-time overhead (PR #184606)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 4 05:20:04 PST 2026
https://github.com/zhouguangyuan0718 created https://github.com/llvm/llvm-project/pull/184606
PR #180242 added reserve-r support across the driver and backend, but it also introduced avoidable compile-time work in hot paths.
In Clang, delay +egpr detection until -ffixed-r16 through -ffixed-r31 are actually queried instead of computing it for every x86_64 invocation.
In LLVM, store X86Subtarget::ReservedRReg in a fixed-size std::bitset and update X86RegisterInfo::getReservedRegs() to iterate only over the reserve-r register ranges instead of scanning every target register.
These changes keep reserve-r behavior unchanged while trimming the extra compile-time overhead introduced by the PR.
>From e5cda84cfd4326fa92208ac5f77a0c2797dbfa30 Mon Sep 17 00:00:00 2001
From: ZhouGuangyuan <zhouguangyuan.xian at gmail.com>
Date: Wed, 4 Mar 2026 21:09:07 +0800
Subject: [PATCH] [X86] Reduce -ffixed-r compile-time overhead
PR #180242 added reserve-r support across the driver and backend, but it also introduced avoidable compile-time work in hot paths.
In Clang, delay +egpr detection until -ffixed-r16 through -ffixed-r31 are actually queried instead of computing it for every x86_64 invocation.
In LLVM, store X86Subtarget::ReservedRReg in a fixed-size std::bitset and update X86RegisterInfo::getReservedRegs() to iterate only over the reserve-r register ranges instead of scanning every target register.
These changes keep reserve-r behavior unchanged while trimming the extra compile-time overhead introduced by the PR.
---
clang/lib/Driver/ToolChains/Arch/X86.cpp | 30 ++++++++++++++----------
llvm/lib/Target/X86/X86RegisterInfo.cpp | 12 ++++++----
llvm/lib/Target/X86/X86Subtarget.cpp | 3 +--
llvm/lib/Target/X86/X86Subtarget.h | 4 ++--
4 files changed, 28 insertions(+), 21 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 6a71512193800..a77f0c33bf7c4 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -151,16 +151,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
}
const llvm::Triple::ArchType ArchType = Triple.getArch();
- bool HasEGPR = false;
-
- // -ffixed-r16 through -ffixed-r31 are only valid when the selected x86_64
- // CPU enables APX EGPR by default; later -target-feature arguments still get
- // their own validation when translated to backend features.
- if (ArchType == llvm::Triple::x86_64) {
- SmallVector<StringRef, 16> CPUFeatures;
- llvm::X86::getFeaturesForCPU(getX86TargetCPU(D, Args, Triple), CPUFeatures);
- HasEGPR = llvm::is_contained(CPUFeatures, "+egpr");
- }
+ std::optional<bool> HasEGPR;
// Add features to be compatible with gcc for Android.
if (Triple.isAndroid()) {
@@ -366,9 +357,25 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
RESERVE_REG(r13)
RESERVE_REG(r14)
RESERVE_REG(r15)
+#undef RESERVE_REG
+
+ bool NeedDetectEGPR =
+ Args.hasArg(options::OPT_ffixed_r16, options::OPT_ffixed_r17,
+ options::OPT_ffixed_r18, options::OPT_ffixed_r19,
+ options::OPT_ffixed_r20, options::OPT_ffixed_r21,
+ options::OPT_ffixed_r22, options::OPT_ffixed_r23,
+ options::OPT_ffixed_r24, options::OPT_ffixed_r25,
+ options::OPT_ffixed_r26, options::OPT_ffixed_r27,
+ options::OPT_ffixed_r28, options::OPT_ffixed_r29,
+ options::OPT_ffixed_r30, options::OPT_ffixed_r31);
+ if (NeedDetectEGPR && !HasEGPR && ArchType == llvm::Triple::x86_64) {
+ SmallVector<StringRef, 16> CPUFeatures;
+ llvm::X86::getFeaturesForCPU(getX86TargetCPU(D, Args, Triple), CPUFeatures);
+ HasEGPR = llvm::is_contained(CPUFeatures, "+egpr");
+ }
#define RESERVE_EGPR(REG) \
if (Args.hasArg(options::OPT_ffixed_##REG)) { \
- if (!HasEGPR) \
+ if (!HasEGPR.value_or(false)) \
D.Diag(diag::err_drv_unsupported_opt_for_target) \
<< "-ffixed-" #REG << Triple.getTriple(); \
else \
@@ -391,5 +398,4 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
RESERVE_EGPR(r30)
RESERVE_EGPR(r31)
#undef RESERVE_EGPR
-#undef RESERVE_REG
}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 42d1bedb350f6..365ebd7e982d6 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -517,13 +517,15 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
auto &ST = MF.getSubtarget<X86Subtarget>();
if (ST.is64Bit()) {
- for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
- // Set r# as reserved register if user required
- if (ST.isRegisterReservedByUser(Reg)) {
+ // Set r# as reserved register if user required
+ for (unsigned Reg = X86::R8; Reg <= X86::R15; ++Reg)
+ if (ST.isRegisterReservedByUser(Reg))
+ for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
+ Reserved.set(SubReg);
+ for (unsigned Reg = X86::R16; Reg <= X86::R31; ++Reg)
+ if (ST.isRegisterReservedByUser(Reg))
for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
Reserved.set(SubReg);
- }
- }
}
// Set the instruction pointer register and its aliases as reserved.
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index f73ca37a45c07..4e2e98410f325 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -317,8 +317,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
: X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
- PICStyle(PICStyles::Style::None), TM(TM),
- ReservedRReg(X86::NUM_TARGET_REGS), TargetTriple(TT),
+ PICStyle(PICStyles::Style::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index dd9a5de3030ed..689ff221ffaf1 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -17,10 +17,10 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/TargetParser/Triple.h"
+#include <bitset>
#include <climits>
#include <memory>
@@ -67,7 +67,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool ATTRIBUTE = DEFAULT;
#include "X86GenSubtargetInfo.inc"
/// ReservedRReg R#i is not available as a general purpose register.
- BitVector ReservedRReg;
+ std::bitset<X86::NUM_TARGET_REGS> ReservedRReg;
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
More information about the cfe-commits
mailing list