[clang] [sanitizer] Parse weighted sanitizer args and -fno-sanitize-top-hot (PR #121619)
Thurston Dang via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 3 19:01:27 PST 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/121619
This adds a function to parse weighted sanitizer flags (e.g., -fsanitize-blah=undefined=0.5,null=0.3) and adds the plumbing to apply that to -fno-sanitize-top-hot from the frontend to backend.
-fno-sanitize-top-hot currently has no effect; future work will use it to generalize ubsan-guard-checks (originaly introduced in 5f9ed2ff8364ff3e4fac410472f421299dafa793).
>From ca1fabc5ea75af0acdd1969c0ad505e04103e1c9 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 4 Jan 2025 02:53:00 +0000
Subject: [PATCH] [sanitizer] Parse weighted sanitizer args and
-fno-sanitize-top-hot
This adds a function to parse weighted sanitizer flags (e.g.,
-fsanitize-blah=undefined=0.5,null=0.3) and adds the plumbing to
apply that to -fno-sanitize-top-hot from the frontend to backend.
-fno-sanitize-top-hot currently has no effect; future work will
use it to generalize ubsan-guard-checks (originaly introduced in 5f9ed2ff8364ff3e4fac410472f421299dafa793).
---
clang/include/clang/Basic/CodeGenOptions.h | 4 ++
clang/include/clang/Basic/Sanitizers.h | 14 +++++
clang/include/clang/Driver/Options.td | 7 +++
clang/include/clang/Driver/SanitizerArgs.h | 1 +
clang/lib/Basic/Sanitizers.cpp | 38 ++++++++++++
clang/lib/Driver/SanitizerArgs.cpp | 69 +++++++++++++++++-----
clang/lib/Frontend/CompilerInvocation.cpp | 5 ++
7 files changed, 124 insertions(+), 14 deletions(-)
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 8097c9ef772bc7..f69f52e49a2fe9 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -384,6 +384,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// the expense of debuggability).
SanitizerSet SanitizeMergeHandlers;
+ /// Set of top hotness thresholds, specifying the fraction of code that is
+ /// excluded from sanitization (0 = skip none, 0.1 = skip hottest 10%, 1.0 = skip all).
+ SanitizerMaskWeights NoSanitizeTopHot = {0};
+
/// List of backend command-line options for -fembed-bitcode.
std::vector<uint8_t> CmdArgs;
diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h
index c890242269b334..fa6b557819a1a1 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -154,6 +154,8 @@ struct SanitizerKind {
#include "clang/Basic/Sanitizers.def"
}; // SanitizerKind
+typedef double SanitizerMaskWeights[SanitizerKind::SO_Count];
+
struct SanitizerSet {
/// Check if a certain (single) sanitizer is enabled.
bool has(SanitizerMask K) const {
@@ -186,10 +188,22 @@ struct SanitizerSet {
/// Returns a non-zero SanitizerMask, or \c 0 if \p Value is not known.
SanitizerMask parseSanitizerValue(StringRef Value, bool AllowGroups);
+/// Parse a single weighted value (e.g., 'undefined=0.05') from a -fsanitize= or
+/// -fno-sanitize= value list.
+/// Returns a non-zero SanitizerMask, or \c 0 if \p Value is not known.
+/// The relevant weight(s) are updated in the passed array.
+/// Individual weights are never reset to zero unless explicitly set
+/// (e.g., 'null=0.0').
+SanitizerMask parseSanitizerWeightedValue(StringRef Value, bool AllowGroups, SanitizerMaskWeights Weights);
+
/// Serialize a SanitizerSet into values for -fsanitize= or -fno-sanitize=.
void serializeSanitizerSet(SanitizerSet Set,
SmallVectorImpl<StringRef> &Values);
+/// Serialize a SanitizerMaskWeights into values for -fsanitize= or -fno-sanitize=.
+void serializeSanitizerMaskWeights(const SanitizerMaskWeights Weights,
+ SmallVectorImpl<StringRef> &Values);
+
/// For each sanitizer group bit set in \p Kinds, set the bits for sanitizers
/// this group enables.
SanitizerMask expandSanitizerGroups(SanitizerMask Kinds);
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d922709db17786..631a6099781e6c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2649,6 +2649,13 @@ def fsanitize_undefined_strip_path_components_EQ : Joined<["-"], "fsanitize-unde
HelpText<"Strip (or keep only, if negative) a given number of path components "
"when emitting check metadata.">,
MarshallingInfoInt<CodeGenOpts<"EmitCheckPathComponentsToStrip">, "0", "int">;
+def fno_sanitize_top_hot_EQ
+ : CommaJoined<["-"], "fno-sanitize-top-hot=">,
+ Group<f_clang_Group>,
+ HelpText<"Skip sanitization for the fraction of top hottest code "
+ "(0.0 [default] = do not skip any sanitization; "
+ "0.1 = skip the hottest 10% of code; "
+ "1.0 = skip all sanitization)">;
} // end -f[no-]sanitize* flags
diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 3b275092bbbe86..854893269e8543 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -26,6 +26,7 @@ class SanitizerArgs {
SanitizerSet RecoverableSanitizers;
SanitizerSet TrapSanitizers;
SanitizerSet MergeHandlers;
+ SanitizerMaskWeights TopHot = {0};
std::vector<std::string> UserIgnorelistFiles;
std::vector<std::string> SystemIgnorelistFiles;
diff --git a/clang/lib/Basic/Sanitizers.cpp b/clang/lib/Basic/Sanitizers.cpp
index 62ccdf8e9bbf28..adfab2d3afab01 100644
--- a/clang/lib/Basic/Sanitizers.cpp
+++ b/clang/lib/Basic/Sanitizers.cpp
@@ -36,6 +36,36 @@ SanitizerMask clang::parseSanitizerValue(StringRef Value, bool AllowGroups) {
return ParsedKind;
}
+SanitizerMask clang::parseSanitizerWeightedValue(StringRef Value, bool AllowGroups, SanitizerMaskWeights Weights) {
+ SanitizerMask ParsedKind = llvm::StringSwitch<SanitizerMask>(Value)
+#define SANITIZER(NAME, ID) .StartsWith(NAME"=", SanitizerKind::ID)
+#define SANITIZER_GROUP(NAME, ID, ALIAS) \
+ .StartsWith(NAME"=", AllowGroups ? SanitizerKind::ID##Group : SanitizerMask())
+#include "clang/Basic/Sanitizers.def"
+ .Default(SanitizerMask());
+
+ if (ParsedKind && Weights) {
+ size_t equalsIndex = Value.find_first_of('=');
+ if (equalsIndex != llvm::StringLiteral::npos) {
+ double arg;
+ if ( (Value.size() > (equalsIndex + 1))
+ && !Value.substr(equalsIndex + 1).getAsDouble(arg)) {
+ // AllowGroups is already taken into account for ParsedKind,
+ // hence we unconditionally expandSanitizerGroups.
+ SanitizerMask ExpandedKind = expandSanitizerGroups(ParsedKind);
+
+ for (unsigned int i = 0; i < SanitizerKind::SO_Count; i++) {
+ if(ExpandedKind & SanitizerMask::bitPosToMask(i)) {
+ Weights[i] = arg;
+ }
+ }
+ }
+ }
+ }
+
+ return ParsedKind;
+}
+
void clang::serializeSanitizerSet(SanitizerSet Set,
SmallVectorImpl<StringRef> &Values) {
#define SANITIZER(NAME, ID) \
@@ -44,6 +74,14 @@ void clang::serializeSanitizerSet(SanitizerSet Set,
#include "clang/Basic/Sanitizers.def"
}
+void clang::serializeSanitizerMaskWeights(const SanitizerMaskWeights Weights,
+ SmallVectorImpl<StringRef> &Values) {
+#define SANITIZER(NAME, ID) \
+ if (Weights[SanitizerKind::SO_##ID]) \
+ Values.push_back(std::string(NAME) + "=" + std::to_string(Weights[SanitizerKind::SO_##ID]));
+#include "clang/Basic/Sanitizers.def"
+}
+
SanitizerMask clang::expandSanitizerGroups(SanitizerMask Kinds) {
#define SANITIZER(NAME, ID)
#define SANITIZER_GROUP(NAME, ID, ALIAS) \
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 98116e2c8336b8..0f500ca14c527b 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -111,7 +111,7 @@ enum BinaryMetadataFeature {
/// Parse a -fsanitize= or -fno-sanitize= argument's values, diagnosing any
/// invalid components. Returns a SanitizerMask.
static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
- bool DiagnoseErrors);
+ bool DiagnoseErrors, SanitizerMaskWeights Weights);
/// Parse -f(no-)?sanitize-coverage= flag values, diagnosing any invalid
/// components. Returns OR of members of \c CoverageFeature enumeration.
@@ -260,7 +260,7 @@ static SanitizerMask
parseSanitizeArgs(const Driver &D, const llvm::opt::ArgList &Args,
bool DiagnoseErrors, SanitizerMask Default,
SanitizerMask AlwaysIn, SanitizerMask AlwaysOut, int OptInID,
- int OptOutID) {
+ int OptOutID, SanitizerMaskWeights Weights) {
assert(!(AlwaysIn & AlwaysOut) &&
"parseSanitizeArgs called with contradictory in/out requirements");
@@ -271,7 +271,7 @@ parseSanitizeArgs(const Driver &D, const llvm::opt::ArgList &Args,
SanitizerMask DiagnosedAlwaysOutViolations;
for (const auto *Arg : Args) {
if (Arg->getOption().matches(OptInID)) {
- SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors);
+ SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors, Weights);
// Report error if user explicitly tries to opt-in to an always-out
// sanitizer.
if (SanitizerMask KindsToDiagnose =
@@ -287,7 +287,7 @@ parseSanitizeArgs(const Driver &D, const llvm::opt::ArgList &Args,
Output |= expandSanitizerGroups(Add);
Arg->claim();
} else if (Arg->getOption().matches(OptOutID)) {
- SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors);
+ SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors, Weights);
// Report error if user explicitly tries to opt-out of an always-in
// sanitizer.
if (SanitizerMask KindsToDiagnose =
@@ -320,7 +320,15 @@ static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
// (not even in recover mode) in order to avoid the need for a ubsan runtime.
return parseSanitizeArgs(D, Args, DiagnoseErrors, TrappingDefault, AlwaysTrap,
NeverTrap, options::OPT_fsanitize_trap_EQ,
- options::OPT_fno_sanitize_trap_EQ);
+ options::OPT_fno_sanitize_trap_EQ, nullptr);
+}
+
+static SanitizerMask parseNoSanitizeHotArgs(const Driver &D,
+ const llvm::opt::ArgList &Args,
+ bool DiagnoseErrors,
+ SanitizerMaskWeights Weights) {
+ return parseSanitizeArgs(D, Args, DiagnoseErrors, {}, {}, {},
+ options::OPT_fno_sanitize_top_hot_EQ, -1, Weights);
}
bool SanitizerArgs::needsFuzzerInterceptors() const {
@@ -403,7 +411,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
for (const llvm::opt::Arg *Arg : llvm::reverse(Args)) {
if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
Arg->claim();
- SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors);
+ SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors, nullptr);
if (RemoveObjectSizeAtO0) {
AllRemove |= SanitizerKind::ObjectSize;
@@ -573,7 +581,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
Kinds |= Add;
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
Arg->claim();
- SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors);
+ SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors, nullptr);
AllRemove |= expandSanitizerGroups(Remove);
}
}
@@ -698,7 +706,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
SanitizerMask RecoverableKinds = parseSanitizeArgs(
D, Args, DiagnoseErrors, RecoverableByDefault, AlwaysRecoverable,
Unrecoverable, options::OPT_fsanitize_recover_EQ,
- options::OPT_fno_sanitize_recover_EQ);
+ options::OPT_fno_sanitize_recover_EQ, nullptr);
RecoverableKinds |= AlwaysRecoverable;
RecoverableKinds &= ~Unrecoverable;
RecoverableKinds &= Kinds;
@@ -710,9 +718,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
SanitizerMask MergeKinds =
parseSanitizeArgs(D, Args, DiagnoseErrors, MergeDefault, {}, {},
options::OPT_fsanitize_merge_handlers_EQ,
- options::OPT_fno_sanitize_merge_handlers_EQ);
+ options::OPT_fno_sanitize_merge_handlers_EQ, nullptr);
MergeKinds &= Kinds;
+ // Parse -fno-sanitize-top-hot flags
+ SanitizerMask HotMask = parseNoSanitizeHotArgs (D, Args, DiagnoseErrors, TopHot);
+ (void)HotMask;
+
// Setup ignorelist files.
// Add default ignorelist from resource directory for activated sanitizers,
// and validate special case lists format.
@@ -1132,6 +1144,12 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
"Overlap between recoverable and trapping sanitizers");
MergeHandlers.Mask |= MergeKinds;
+
+ // Zero out TopHot for unused sanitizers
+ for (unsigned int i = 0; i < SanitizerKind::SO_Count; i++) {
+ if (!(Sanitizers.Mask & SanitizerMask::bitPosToMask(i)))
+ TopHot[i] = 0;
+ }
}
static std::string toString(const clang::SanitizerSet &Sanitizers) {
@@ -1146,6 +1164,18 @@ static std::string toString(const clang::SanitizerSet &Sanitizers) {
return Res;
}
+static std::string toString(const clang::SanitizerMaskWeights &Weights) {
+ std::string Res;
+#define SANITIZER(NAME, ID) \
+ if (Weights[SanitizerKind::SO_##ID]) { \
+ if (!Res.empty()) \
+ Res += ","; \
+ Res += std::string(NAME) + "=" + std::to_string(Weights[SanitizerKind::SO_##ID]); \
+ }
+#include "clang/Basic/Sanitizers.def"
+ return Res;
+}
+
static void addSpecialCaseListOpt(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const char *SCLOptFlag,
@@ -1297,6 +1327,11 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
CmdArgs.push_back(
Args.MakeArgString("-fsanitize-merge=" + toString(MergeHandlers)));
+ std::string TopHotStr = toString(TopHot);
+ if (TopHotStr != "")
+ CmdArgs.push_back(
+ Args.MakeArgString("-fno-sanitize-top-hot=" + TopHotStr));
+
addSpecialCaseListOpt(Args, CmdArgs,
"-fsanitize-ignorelist=", UserIgnorelistFiles);
addSpecialCaseListOpt(Args, CmdArgs,
@@ -1463,7 +1498,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
}
SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
- bool DiagnoseErrors) {
+ bool DiagnoseErrors, SanitizerMaskWeights Weights) {
assert(
(A->getOption().matches(options::OPT_fsanitize_EQ) ||
A->getOption().matches(options::OPT_fno_sanitize_EQ) ||
@@ -1472,7 +1507,8 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
A->getOption().matches(options::OPT_fsanitize_trap_EQ) ||
A->getOption().matches(options::OPT_fno_sanitize_trap_EQ) ||
A->getOption().matches(options::OPT_fsanitize_merge_handlers_EQ) ||
- A->getOption().matches(options::OPT_fno_sanitize_merge_handlers_EQ)) &&
+ A->getOption().matches(options::OPT_fno_sanitize_merge_handlers_EQ) ||
+ A->getOption().matches(options::OPT_fno_sanitize_top_hot_EQ)) &&
"Invalid argument in parseArgValues!");
SanitizerMask Kinds;
for (int i = 0, n = A->getNumValues(); i != n; ++i) {
@@ -1482,8 +1518,13 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
if (A->getOption().matches(options::OPT_fsanitize_EQ) &&
0 == strcmp("all", Value))
Kind = SanitizerMask();
- else
+ else if (A->getOption().matches(options::OPT_fno_sanitize_top_hot_EQ)) {
+ assert(Weights && "Null weights parameter provided for parsing fno_sanitize_top_hot!");
+ Kind = parseSanitizerWeightedValue(Value, /*AllowGroups=*/true, Weights);
+ } else {
+ assert((!Weights) && "Non-null weights parameter erroneously provided!");
Kind = parseSanitizerValue(Value, /*AllowGroups=*/true);
+ }
if (Kind)
Kinds |= Kind;
@@ -1586,12 +1627,12 @@ std::string lastArgumentForMask(const Driver &D, const llvm::opt::ArgList &Args,
const auto *Arg = *I;
if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
SanitizerMask AddKinds =
- expandSanitizerGroups(parseArgValues(D, Arg, false));
+ expandSanitizerGroups(parseArgValues(D, Arg, false, nullptr));
if (AddKinds & Mask)
return describeSanitizeArg(Arg, Mask);
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
SanitizerMask RemoveKinds =
- expandSanitizerGroups(parseArgValues(D, Arg, false));
+ expandSanitizerGroups(parseArgValues(D, Arg, false, nullptr));
Mask &= ~RemoveKinds;
}
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 348c56cc37da3f..c1c11f5a2325c7 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1796,6 +1796,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
serializeSanitizerKinds(Opts.SanitizeMergeHandlers))
GenerateArg(Consumer, OPT_fsanitize_merge_handlers_EQ, Sanitizer);
+ SmallVector<StringRef, 4> Values;
+ serializeSanitizerMaskWeights(Opts.NoSanitizeTopHot, Values);
+ for (StringRef Sanitizer : Values)
+ GenerateArg(Consumer, OPT_fno_sanitize_top_hot_EQ, Sanitizer);
+
if (!Opts.EmitVersionIdentMetadata)
GenerateArg(Consumer, OPT_Qn);
More information about the cfe-commits
mailing list