[llvm] [Remarks] YAMLRemarkSerializer: Fix StringRef out-of-bounds read (PR #160397)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 16:49:06 PDT 2025
https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/160397
>From 94952af304e35d74c9fed83755eaeff9480133b8 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 23 Sep 2025 14:05:40 -0700
Subject: [PATCH] [Remarks] YAMLRemarkSerializer: StringRef-ize apis to avoid
out-of-bounds read footguns
In #159759, Tobias identified that because YAML IO `mapRequired` expected a
null-terminated `const char * Key`, we couldn't legally pass a `StringRef`` to
it, as that might be length-terminated and not null-terminated. In this patch,
we move all of the YAML IO functions that accept a const char * over to
`StringRef`, avoiding that footgun altogether.
---
llvm/include/llvm/Support/YAMLTraits.h | 53 +++++++++++------------
llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 10 ++---
llvm/lib/Support/YAMLTraits.cpp | 14 +++---
3 files changed, 37 insertions(+), 40 deletions(-)
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 81e3e2e41e86d..3d36f41ca1a04 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -705,7 +705,7 @@ class LLVM_ABI IO {
virtual bool mapTag(StringRef Tag, bool Default = false) = 0;
virtual void beginMapping() = 0;
virtual void endMapping() = 0;
- virtual bool preflightKey(const char *, bool, bool, bool &, void *&) = 0;
+ virtual bool preflightKey(StringRef, bool, bool, bool &, void *&) = 0;
virtual void postflightKey(void *) = 0;
virtual std::vector<StringRef> keys() = 0;
@@ -713,12 +713,12 @@ class LLVM_ABI IO {
virtual void endFlowMapping() = 0;
virtual void beginEnumScalar() = 0;
- virtual bool matchEnumScalar(const char *, bool) = 0;
+ virtual bool matchEnumScalar(StringRef, bool) = 0;
virtual bool matchEnumFallback() = 0;
virtual void endEnumScalar() = 0;
virtual bool beginBitSetScalar(bool &) = 0;
- virtual bool bitSetMatch(const char *, bool) = 0;
+ virtual bool bitSetMatch(StringRef, bool) = 0;
virtual void endBitSetScalar() = 0;
virtual void scalarString(StringRef &, QuotingType) = 0;
@@ -731,8 +731,7 @@ class LLVM_ABI IO {
virtual std::error_code error() = 0;
virtual void setAllowUnknownKeys(bool Allow);
- template <typename T>
- void enumCase(T &Val, const char *Str, const T ConstVal) {
+ template <typename T> void enumCase(T &Val, StringRef Str, const T ConstVal) {
if (matchEnumScalar(Str, outputting() && Val == ConstVal)) {
Val = ConstVal;
}
@@ -740,7 +739,7 @@ class LLVM_ABI IO {
// allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
template <typename T>
- void enumCase(T &Val, const char *Str, const uint32_t ConstVal) {
+ void enumCase(T &Val, StringRef Str, const uint32_t ConstVal) {
if (matchEnumScalar(Str, outputting() && Val == static_cast<T>(ConstVal))) {
Val = ConstVal;
}
@@ -757,7 +756,7 @@ class LLVM_ABI IO {
}
template <typename T>
- void bitSetCase(T &Val, const char *Str, const T ConstVal) {
+ void bitSetCase(T &Val, StringRef Str, const T ConstVal) {
if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
Val = static_cast<T>(Val | ConstVal);
}
@@ -765,20 +764,20 @@ class LLVM_ABI IO {
// allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
template <typename T>
- void bitSetCase(T &Val, const char *Str, const uint32_t ConstVal) {
+ void bitSetCase(T &Val, StringRef Str, const uint32_t ConstVal) {
if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
Val = static_cast<T>(Val | ConstVal);
}
}
template <typename T>
- void maskedBitSetCase(T &Val, const char *Str, T ConstVal, T Mask) {
+ void maskedBitSetCase(T &Val, StringRef Str, T ConstVal, T Mask) {
if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
Val = Val | ConstVal;
}
template <typename T>
- void maskedBitSetCase(T &Val, const char *Str, uint32_t ConstVal,
+ void maskedBitSetCase(T &Val, StringRef Str, uint32_t ConstVal,
uint32_t Mask) {
if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
Val = Val | ConstVal;
@@ -787,29 +786,29 @@ class LLVM_ABI IO {
void *getContext() const;
void setContext(void *);
- template <typename T> void mapRequired(const char *Key, T &Val) {
+ template <typename T> void mapRequired(StringRef Key, T &Val) {
EmptyContext Ctx;
this->processKey(Key, Val, true, Ctx);
}
template <typename T, typename Context>
- void mapRequired(const char *Key, T &Val, Context &Ctx) {
+ void mapRequired(StringRef Key, T &Val, Context &Ctx) {
this->processKey(Key, Val, true, Ctx);
}
- template <typename T> void mapOptional(const char *Key, T &Val) {
+ template <typename T> void mapOptional(StringRef Key, T &Val) {
EmptyContext Ctx;
mapOptionalWithContext(Key, Val, Ctx);
}
template <typename T, typename DefaultT>
- void mapOptional(const char *Key, T &Val, const DefaultT &Default) {
+ void mapOptional(StringRef Key, T &Val, const DefaultT &Default) {
EmptyContext Ctx;
mapOptionalWithContext(Key, Val, Default, Ctx);
}
template <typename T, typename Context>
- void mapOptionalWithContext(const char *Key, T &Val, Context &Ctx) {
+ void mapOptionalWithContext(StringRef Key, T &Val, Context &Ctx) {
if constexpr (has_SequenceTraits<T>::value) {
// omit key/value instead of outputting empty sequence
if (this->canElideEmptySequence() && Val.begin() == Val.end())
@@ -819,14 +818,14 @@ class LLVM_ABI IO {
}
template <typename T, typename Context>
- void mapOptionalWithContext(const char *Key, std::optional<T> &Val,
+ void mapOptionalWithContext(StringRef Key, std::optional<T> &Val,
Context &Ctx) {
this->processKeyWithDefault(Key, Val, std::optional<T>(),
/*Required=*/false, Ctx);
}
template <typename T, typename Context, typename DefaultT>
- void mapOptionalWithContext(const char *Key, T &Val, const DefaultT &Default,
+ void mapOptionalWithContext(StringRef Key, T &Val, const DefaultT &Default,
Context &Ctx) {
static_assert(std::is_convertible<DefaultT, T>::value,
"Default type must be implicitly convertible to value type!");
@@ -836,12 +835,12 @@ class LLVM_ABI IO {
private:
template <typename T, typename Context>
- void processKeyWithDefault(const char *Key, std::optional<T> &Val,
+ void processKeyWithDefault(StringRef Key, std::optional<T> &Val,
const std::optional<T> &DefaultValue,
bool Required, Context &Ctx);
template <typename T, typename Context>
- void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue,
+ void processKeyWithDefault(StringRef Key, T &Val, const T &DefaultValue,
bool Required, Context &Ctx) {
void *SaveInfo;
bool UseDefault;
@@ -857,7 +856,7 @@ class LLVM_ABI IO {
}
template <typename T, typename Context>
- void processKey(const char *Key, T &Val, bool Required, Context &Ctx) {
+ void processKey(StringRef Key, T &Val, bool Required, Context &Ctx) {
void *SaveInfo;
bool UseDefault;
if (this->preflightKey(Key, Required, false, UseDefault, SaveInfo)) {
@@ -1332,7 +1331,7 @@ class LLVM_ABI Input : public IO {
bool mapTag(StringRef, bool) override;
void beginMapping() override;
void endMapping() override;
- bool preflightKey(const char *, bool, bool, bool &, void *&) override;
+ bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
void postflightKey(void *) override;
std::vector<StringRef> keys() override;
void beginFlowMapping() override;
@@ -1346,11 +1345,11 @@ class LLVM_ABI Input : public IO {
void postflightFlowElement(void *) override;
void endFlowSequence() override;
void beginEnumScalar() override;
- bool matchEnumScalar(const char *, bool) override;
+ bool matchEnumScalar(StringRef, bool) override;
bool matchEnumFallback() override;
void endEnumScalar() override;
bool beginBitSetScalar(bool &) override;
- bool bitSetMatch(const char *, bool) override;
+ bool bitSetMatch(StringRef, bool) override;
void endBitSetScalar() override;
void scalarString(StringRef &, QuotingType) override;
void blockScalarString(StringRef &) override;
@@ -1483,7 +1482,7 @@ class LLVM_ABI Output : public IO {
bool mapTag(StringRef, bool) override;
void beginMapping() override;
void endMapping() override;
- bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
+ bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
void postflightKey(void *) override;
std::vector<StringRef> keys() override;
void beginFlowMapping() override;
@@ -1497,11 +1496,11 @@ class LLVM_ABI Output : public IO {
void postflightFlowElement(void *) override;
void endFlowSequence() override;
void beginEnumScalar() override;
- bool matchEnumScalar(const char *, bool) override;
+ bool matchEnumScalar(StringRef, bool) override;
bool matchEnumFallback() override;
void endEnumScalar() override;
bool beginBitSetScalar(bool &) override;
- bool bitSetMatch(const char *, bool) override;
+ bool bitSetMatch(StringRef, bool) override;
void endBitSetScalar() override;
void scalarString(StringRef &, QuotingType) override;
void blockScalarString(StringRef &) override;
@@ -1558,7 +1557,7 @@ class LLVM_ABI Output : public IO {
};
template <typename T, typename Context>
-void IO::processKeyWithDefault(const char *Key, std::optional<T> &Val,
+void IO::processKeyWithDefault(StringRef Key, std::optional<T> &Val,
const std::optional<T> &DefaultValue,
bool Required, Context &Ctx) {
assert(!DefaultValue && "std::optional<T> shouldn't have a value!");
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 42ebfbfb3729a..22e297040575c 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -114,15 +114,13 @@ template <> struct MappingTraits<Argument> {
static void mapping(IO &io, Argument &A) {
assert(io.outputting() && "input not yet implemented");
- // A.Key.data() is not necessarily null-terminated, so we must make a copy,
- // otherwise we potentially read out of bounds.
- // FIXME: Add support for StringRef Keys in YAML IO.
- std::string Key(A.Key);
+ // NB: A.Key.data() is not necessarily null-terminated, as the StringRef may
+ // be a span into the middle of a string.
if (StringRef(A.Val).count('\n') > 1) {
StringBlockVal S(A.Val);
- io.mapRequired(Key.c_str(), S);
+ io.mapRequired(A.Key, S);
} else {
- io.mapRequired(Key.c_str(), A.Val);
+ io.mapRequired(A.Key, A.Val);
}
io.mapOptional("DebugLoc", A.Loc);
}
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 035828b594e84..95a41eafdf5e4 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -144,7 +144,7 @@ std::vector<StringRef> Input::keys() {
return Ret;
}
-bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
+bool Input::preflightKey(StringRef Key, bool Required, bool, bool &UseDefault,
void *&SaveInfo) {
UseDefault = false;
if (EC)
@@ -168,7 +168,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
UseDefault = true;
return false;
}
- MN->ValidKeys.push_back(Key);
+ MN->ValidKeys.push_back(Key.str());
HNode *Value = MN->Mapping[Key].first;
if (!Value) {
if (Required)
@@ -266,7 +266,7 @@ void Input::beginEnumScalar() {
ScalarMatchFound = false;
}
-bool Input::matchEnumScalar(const char *Str, bool) {
+bool Input::matchEnumScalar(StringRef Str, bool) {
if (ScalarMatchFound)
return false;
if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
@@ -302,7 +302,7 @@ bool Input::beginBitSetScalar(bool &DoClear) {
return true;
}
-bool Input::bitSetMatch(const char *Str, bool) {
+bool Input::bitSetMatch(StringRef Str, bool) {
if (EC)
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
@@ -541,7 +541,7 @@ std::vector<StringRef> Output::keys() {
report_fatal_error("invalid call");
}
-bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
+bool Output::preflightKey(StringRef Key, bool Required, bool SameAsDefault,
bool &UseDefault, void *&SaveInfo) {
UseDefault = false;
SaveInfo = nullptr;
@@ -666,7 +666,7 @@ void Output::beginEnumScalar() {
EnumerationMatchFound = false;
}
-bool Output::matchEnumScalar(const char *Str, bool Match) {
+bool Output::matchEnumScalar(StringRef Str, bool Match) {
if (Match && !EnumerationMatchFound) {
newLineCheck();
outputUpToEndOfLine(Str);
@@ -695,7 +695,7 @@ bool Output::beginBitSetScalar(bool &DoClear) {
return true;
}
-bool Output::bitSetMatch(const char *Str, bool Matches) {
+bool Output::bitSetMatch(StringRef Str, bool Matches) {
if (Matches) {
if (NeedBitValueComma)
output(", ");
More information about the llvm-commits
mailing list