[llvm] [Remarks] YAMLRemarkSerializer: Fix StringRef out-of-bounds read (PR #160397)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 23 16:49:06 PDT 2025


https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/160397

>From 94952af304e35d74c9fed83755eaeff9480133b8 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 23 Sep 2025 14:05:40 -0700
Subject: [PATCH] [Remarks] YAMLRemarkSerializer: StringRef-ize apis to avoid
 out-of-bounds read footguns

In #159759, Tobias identified that because YAML IO `mapRequired` expected a
null-terminated `const char * Key`, we couldn't legally pass a `StringRef`` to
it, as that might be length-terminated and not null-terminated.  In this patch,
we move all of the YAML IO functions that accept a const char * over to
`StringRef`, avoiding that footgun altogether.
---
 llvm/include/llvm/Support/YAMLTraits.h    | 53 +++++++++++------------
 llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 10 ++---
 llvm/lib/Support/YAMLTraits.cpp           | 14 +++---
 3 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 81e3e2e41e86d..3d36f41ca1a04 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -705,7 +705,7 @@ class LLVM_ABI IO {
   virtual bool mapTag(StringRef Tag, bool Default = false) = 0;
   virtual void beginMapping() = 0;
   virtual void endMapping() = 0;
-  virtual bool preflightKey(const char *, bool, bool, bool &, void *&) = 0;
+  virtual bool preflightKey(StringRef, bool, bool, bool &, void *&) = 0;
   virtual void postflightKey(void *) = 0;
   virtual std::vector<StringRef> keys() = 0;
 
@@ -713,12 +713,12 @@ class LLVM_ABI IO {
   virtual void endFlowMapping() = 0;
 
   virtual void beginEnumScalar() = 0;
-  virtual bool matchEnumScalar(const char *, bool) = 0;
+  virtual bool matchEnumScalar(StringRef, bool) = 0;
   virtual bool matchEnumFallback() = 0;
   virtual void endEnumScalar() = 0;
 
   virtual bool beginBitSetScalar(bool &) = 0;
-  virtual bool bitSetMatch(const char *, bool) = 0;
+  virtual bool bitSetMatch(StringRef, bool) = 0;
   virtual void endBitSetScalar() = 0;
 
   virtual void scalarString(StringRef &, QuotingType) = 0;
@@ -731,8 +731,7 @@ class LLVM_ABI IO {
   virtual std::error_code error() = 0;
   virtual void setAllowUnknownKeys(bool Allow);
 
-  template <typename T>
-  void enumCase(T &Val, const char *Str, const T ConstVal) {
+  template <typename T> void enumCase(T &Val, StringRef Str, const T ConstVal) {
     if (matchEnumScalar(Str, outputting() && Val == ConstVal)) {
       Val = ConstVal;
     }
@@ -740,7 +739,7 @@ class LLVM_ABI IO {
 
   // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
   template <typename T>
-  void enumCase(T &Val, const char *Str, const uint32_t ConstVal) {
+  void enumCase(T &Val, StringRef Str, const uint32_t ConstVal) {
     if (matchEnumScalar(Str, outputting() && Val == static_cast<T>(ConstVal))) {
       Val = ConstVal;
     }
@@ -757,7 +756,7 @@ class LLVM_ABI IO {
   }
 
   template <typename T>
-  void bitSetCase(T &Val, const char *Str, const T ConstVal) {
+  void bitSetCase(T &Val, StringRef Str, const T ConstVal) {
     if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
       Val = static_cast<T>(Val | ConstVal);
     }
@@ -765,20 +764,20 @@ class LLVM_ABI IO {
 
   // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
   template <typename T>
-  void bitSetCase(T &Val, const char *Str, const uint32_t ConstVal) {
+  void bitSetCase(T &Val, StringRef Str, const uint32_t ConstVal) {
     if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
       Val = static_cast<T>(Val | ConstVal);
     }
   }
 
   template <typename T>
-  void maskedBitSetCase(T &Val, const char *Str, T ConstVal, T Mask) {
+  void maskedBitSetCase(T &Val, StringRef Str, T ConstVal, T Mask) {
     if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
       Val = Val | ConstVal;
   }
 
   template <typename T>
-  void maskedBitSetCase(T &Val, const char *Str, uint32_t ConstVal,
+  void maskedBitSetCase(T &Val, StringRef Str, uint32_t ConstVal,
                         uint32_t Mask) {
     if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
       Val = Val | ConstVal;
@@ -787,29 +786,29 @@ class LLVM_ABI IO {
   void *getContext() const;
   void setContext(void *);
 
-  template <typename T> void mapRequired(const char *Key, T &Val) {
+  template <typename T> void mapRequired(StringRef Key, T &Val) {
     EmptyContext Ctx;
     this->processKey(Key, Val, true, Ctx);
   }
 
   template <typename T, typename Context>
-  void mapRequired(const char *Key, T &Val, Context &Ctx) {
+  void mapRequired(StringRef Key, T &Val, Context &Ctx) {
     this->processKey(Key, Val, true, Ctx);
   }
 
-  template <typename T> void mapOptional(const char *Key, T &Val) {
+  template <typename T> void mapOptional(StringRef Key, T &Val) {
     EmptyContext Ctx;
     mapOptionalWithContext(Key, Val, Ctx);
   }
 
   template <typename T, typename DefaultT>
-  void mapOptional(const char *Key, T &Val, const DefaultT &Default) {
+  void mapOptional(StringRef Key, T &Val, const DefaultT &Default) {
     EmptyContext Ctx;
     mapOptionalWithContext(Key, Val, Default, Ctx);
   }
 
   template <typename T, typename Context>
-  void mapOptionalWithContext(const char *Key, T &Val, Context &Ctx) {
+  void mapOptionalWithContext(StringRef Key, T &Val, Context &Ctx) {
     if constexpr (has_SequenceTraits<T>::value) {
       // omit key/value instead of outputting empty sequence
       if (this->canElideEmptySequence() && Val.begin() == Val.end())
@@ -819,14 +818,14 @@ class LLVM_ABI IO {
   }
 
   template <typename T, typename Context>
-  void mapOptionalWithContext(const char *Key, std::optional<T> &Val,
+  void mapOptionalWithContext(StringRef Key, std::optional<T> &Val,
                               Context &Ctx) {
     this->processKeyWithDefault(Key, Val, std::optional<T>(),
                                 /*Required=*/false, Ctx);
   }
 
   template <typename T, typename Context, typename DefaultT>
-  void mapOptionalWithContext(const char *Key, T &Val, const DefaultT &Default,
+  void mapOptionalWithContext(StringRef Key, T &Val, const DefaultT &Default,
                               Context &Ctx) {
     static_assert(std::is_convertible<DefaultT, T>::value,
                   "Default type must be implicitly convertible to value type!");
@@ -836,12 +835,12 @@ class LLVM_ABI IO {
 
 private:
   template <typename T, typename Context>
-  void processKeyWithDefault(const char *Key, std::optional<T> &Val,
+  void processKeyWithDefault(StringRef Key, std::optional<T> &Val,
                              const std::optional<T> &DefaultValue,
                              bool Required, Context &Ctx);
 
   template <typename T, typename Context>
-  void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue,
+  void processKeyWithDefault(StringRef Key, T &Val, const T &DefaultValue,
                              bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
@@ -857,7 +856,7 @@ class LLVM_ABI IO {
   }
 
   template <typename T, typename Context>
-  void processKey(const char *Key, T &Val, bool Required, Context &Ctx) {
+  void processKey(StringRef Key, T &Val, bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
     if (this->preflightKey(Key, Required, false, UseDefault, SaveInfo)) {
@@ -1332,7 +1331,7 @@ class LLVM_ABI Input : public IO {
   bool mapTag(StringRef, bool) override;
   void beginMapping() override;
   void endMapping() override;
-  bool preflightKey(const char *, bool, bool, bool &, void *&) override;
+  bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
   std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
@@ -1346,11 +1345,11 @@ class LLVM_ABI Input : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char *, bool) override;
+  bool matchEnumScalar(StringRef, bool) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
-  bool bitSetMatch(const char *, bool) override;
+  bool bitSetMatch(StringRef, bool) override;
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
@@ -1483,7 +1482,7 @@ class LLVM_ABI Output : public IO {
   bool mapTag(StringRef, bool) override;
   void beginMapping() override;
   void endMapping() override;
-  bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
+  bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
   std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
@@ -1497,11 +1496,11 @@ class LLVM_ABI Output : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char *, bool) override;
+  bool matchEnumScalar(StringRef, bool) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
-  bool bitSetMatch(const char *, bool) override;
+  bool bitSetMatch(StringRef, bool) override;
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
@@ -1558,7 +1557,7 @@ class LLVM_ABI Output : public IO {
 };
 
 template <typename T, typename Context>
-void IO::processKeyWithDefault(const char *Key, std::optional<T> &Val,
+void IO::processKeyWithDefault(StringRef Key, std::optional<T> &Val,
                                const std::optional<T> &DefaultValue,
                                bool Required, Context &Ctx) {
   assert(!DefaultValue && "std::optional<T> shouldn't have a value!");
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 42ebfbfb3729a..22e297040575c 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -114,15 +114,13 @@ template <> struct MappingTraits<Argument> {
   static void mapping(IO &io, Argument &A) {
     assert(io.outputting() && "input not yet implemented");
 
-    // A.Key.data() is not necessarily null-terminated, so we must make a copy,
-    // otherwise we potentially read out of bounds.
-    // FIXME: Add support for StringRef Keys in YAML IO.
-    std::string Key(A.Key);
+    // NB: A.Key.data() is not necessarily null-terminated, as the StringRef may
+    // be a span into the middle of a string.
     if (StringRef(A.Val).count('\n') > 1) {
       StringBlockVal S(A.Val);
-      io.mapRequired(Key.c_str(), S);
+      io.mapRequired(A.Key, S);
     } else {
-      io.mapRequired(Key.c_str(), A.Val);
+      io.mapRequired(A.Key, A.Val);
     }
     io.mapOptional("DebugLoc", A.Loc);
   }
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 035828b594e84..95a41eafdf5e4 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -144,7 +144,7 @@ std::vector<StringRef> Input::keys() {
   return Ret;
 }
 
-bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
+bool Input::preflightKey(StringRef Key, bool Required, bool, bool &UseDefault,
                          void *&SaveInfo) {
   UseDefault = false;
   if (EC)
@@ -168,7 +168,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
       UseDefault = true;
     return false;
   }
-  MN->ValidKeys.push_back(Key);
+  MN->ValidKeys.push_back(Key.str());
   HNode *Value = MN->Mapping[Key].first;
   if (!Value) {
     if (Required)
@@ -266,7 +266,7 @@ void Input::beginEnumScalar() {
   ScalarMatchFound = false;
 }
 
-bool Input::matchEnumScalar(const char *Str, bool) {
+bool Input::matchEnumScalar(StringRef Str, bool) {
   if (ScalarMatchFound)
     return false;
   if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
@@ -302,7 +302,7 @@ bool Input::beginBitSetScalar(bool &DoClear) {
   return true;
 }
 
-bool Input::bitSetMatch(const char *Str, bool) {
+bool Input::bitSetMatch(StringRef Str, bool) {
   if (EC)
     return false;
   if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
@@ -541,7 +541,7 @@ std::vector<StringRef> Output::keys() {
   report_fatal_error("invalid call");
 }
 
-bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
+bool Output::preflightKey(StringRef Key, bool Required, bool SameAsDefault,
                           bool &UseDefault, void *&SaveInfo) {
   UseDefault = false;
   SaveInfo = nullptr;
@@ -666,7 +666,7 @@ void Output::beginEnumScalar() {
   EnumerationMatchFound = false;
 }
 
-bool Output::matchEnumScalar(const char *Str, bool Match) {
+bool Output::matchEnumScalar(StringRef Str, bool Match) {
   if (Match && !EnumerationMatchFound) {
     newLineCheck();
     outputUpToEndOfLine(Str);
@@ -695,7 +695,7 @@ bool Output::beginBitSetScalar(bool &DoClear) {
   return true;
 }
 
-bool Output::bitSetMatch(const char *Str, bool Matches) {
+bool Output::bitSetMatch(StringRef Str, bool Matches) {
   if (Matches) {
     if (NeedBitValueComma)
       output(", ");



More information about the llvm-commits mailing list