[llvm] [Remarks] YAMLRemarkSerializer: Fix StringRef out-of-bounds read (PR #160397)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 23 14:10:58 PDT 2025


https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/160397

YAML IO `mapRequired` expected a null-terminated `const char * Key`, so we couldn't legally pass a `StringRef`` to it, as that might be length-terminated and not null-terminated.  In this patch, we move all of the YAML IO functions that accept a const char * over to `StringRef`.

Co-authored by: Tobias Stadler <tobias_stadler at apple.com>

Subsumes #159759

>From f2e6e81f53d8f3098816e007c5ffaef1117368cb Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 23 Sep 2025 14:05:40 -0700
Subject: [PATCH] [Remarks] YAMLRemarkSerializer: Fix StringRef out-of-bounds
 read

YAML IO `mapRequired` expected a null-terminated `const char * Key`, so
we couldn't legally pass a `StringRef`` to it, as that might be
length-terminated and not null-terminated.  In this patch, we move all of the
YAML IO functions that accept a const char * over to `StringRef`.

Co-authored by: Tobias Stadler <tobias_stadler at apple.com>

Subsumes #159759
---
 llvm/include/llvm/Support/YAMLTraits.h        | 52 +++++++++----------
 llvm/lib/Remarks/YAMLRemarkSerializer.cpp     |  4 +-
 llvm/lib/Support/YAMLTraits.cpp               | 14 ++---
 .../Remarks/YAMLRemarksSerializerTest.cpp     | 30 +++++++++++
 4 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 07ac080d2c235..05dd8cef92d93 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -715,7 +715,7 @@ class LLVM_ABI IO {
   virtual bool mapTag(StringRef Tag, bool Default = false) = 0;
   virtual void beginMapping() = 0;
   virtual void endMapping() = 0;
-  virtual bool preflightKey(const char *, bool, bool, bool &, void *&) = 0;
+  virtual bool preflightKey(StringRef, bool, bool, bool &, void *&) = 0;
   virtual void postflightKey(void *) = 0;
   virtual std::vector<StringRef> keys() = 0;
 
@@ -723,12 +723,12 @@ class LLVM_ABI IO {
   virtual void endFlowMapping() = 0;
 
   virtual void beginEnumScalar() = 0;
-  virtual bool matchEnumScalar(const char *, bool) = 0;
+  virtual bool matchEnumScalar(StringRef, bool) = 0;
   virtual bool matchEnumFallback() = 0;
   virtual void endEnumScalar() = 0;
 
   virtual bool beginBitSetScalar(bool &) = 0;
-  virtual bool bitSetMatch(const char *, bool) = 0;
+  virtual bool bitSetMatch(StringRef, bool) = 0;
   virtual void endBitSetScalar() = 0;
 
   virtual void scalarString(StringRef &, QuotingType) = 0;
@@ -742,7 +742,7 @@ class LLVM_ABI IO {
   virtual void setAllowUnknownKeys(bool Allow);
 
   template <typename T>
-  void enumCase(T &Val, const char *Str, const T ConstVal) {
+  void enumCase(T &Val, StringRef Str, const T ConstVal) {
     if (matchEnumScalar(Str, outputting() && Val == ConstVal)) {
       Val = ConstVal;
     }
@@ -750,7 +750,7 @@ class LLVM_ABI IO {
 
   // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
   template <typename T>
-  void enumCase(T &Val, const char *Str, const uint32_t ConstVal) {
+  void enumCase(T &Val, StringRef Str, const uint32_t ConstVal) {
     if (matchEnumScalar(Str, outputting() && Val == static_cast<T>(ConstVal))) {
       Val = ConstVal;
     }
@@ -767,7 +767,7 @@ class LLVM_ABI IO {
   }
 
   template <typename T>
-  void bitSetCase(T &Val, const char *Str, const T ConstVal) {
+  void bitSetCase(T &Val, StringRef Str, const T ConstVal) {
     if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
       Val = static_cast<T>(Val | ConstVal);
     }
@@ -775,20 +775,20 @@ class LLVM_ABI IO {
 
   // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
   template <typename T>
-  void bitSetCase(T &Val, const char *Str, const uint32_t ConstVal) {
+  void bitSetCase(T &Val, StringRef Str, const uint32_t ConstVal) {
     if (bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal)) {
       Val = static_cast<T>(Val | ConstVal);
     }
   }
 
   template <typename T>
-  void maskedBitSetCase(T &Val, const char *Str, T ConstVal, T Mask) {
+  void maskedBitSetCase(T &Val, StringRef Str, T ConstVal, T Mask) {
     if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
       Val = Val | ConstVal;
   }
 
   template <typename T>
-  void maskedBitSetCase(T &Val, const char *Str, uint32_t ConstVal,
+  void maskedBitSetCase(T &Val, StringRef Str, uint32_t ConstVal,
                         uint32_t Mask) {
     if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal))
       Val = Val | ConstVal;
@@ -797,29 +797,29 @@ class LLVM_ABI IO {
   void *getContext() const;
   void setContext(void *);
 
-  template <typename T> void mapRequired(const char *Key, T &Val) {
+  template <typename T> void mapRequired(StringRef Key, T &Val) {
     EmptyContext Ctx;
     this->processKey(Key, Val, true, Ctx);
   }
 
   template <typename T, typename Context>
-  void mapRequired(const char *Key, T &Val, Context &Ctx) {
+  void mapRequired(StringRef Key, T &Val, Context &Ctx) {
     this->processKey(Key, Val, true, Ctx);
   }
 
-  template <typename T> void mapOptional(const char *Key, T &Val) {
+  template <typename T> void mapOptional(StringRef Key, T &Val) {
     EmptyContext Ctx;
     mapOptionalWithContext(Key, Val, Ctx);
   }
 
   template <typename T, typename DefaultT>
-  void mapOptional(const char *Key, T &Val, const DefaultT &Default) {
+  void mapOptional(StringRef Key, T &Val, const DefaultT &Default) {
     EmptyContext Ctx;
     mapOptionalWithContext(Key, Val, Default, Ctx);
   }
 
   template <typename T, typename Context>
-  void mapOptionalWithContext(const char *Key, T &Val, Context &Ctx) {
+  void mapOptionalWithContext(StringRef Key, T &Val, Context &Ctx) {
     if constexpr (has_SequenceTraits<T>::value) {
       // omit key/value instead of outputting empty sequence
       if (this->canElideEmptySequence() && Val.begin() == Val.end())
@@ -829,14 +829,14 @@ class LLVM_ABI IO {
   }
 
   template <typename T, typename Context>
-  void mapOptionalWithContext(const char *Key, std::optional<T> &Val,
+  void mapOptionalWithContext(StringRef Key, std::optional<T> &Val,
                               Context &Ctx) {
     this->processKeyWithDefault(Key, Val, std::optional<T>(),
                                 /*Required=*/false, Ctx);
   }
 
   template <typename T, typename Context, typename DefaultT>
-  void mapOptionalWithContext(const char *Key, T &Val, const DefaultT &Default,
+  void mapOptionalWithContext(StringRef Key, T &Val, const DefaultT &Default,
                               Context &Ctx) {
     static_assert(std::is_convertible<DefaultT, T>::value,
                   "Default type must be implicitly convertible to value type!");
@@ -846,12 +846,12 @@ class LLVM_ABI IO {
 
 private:
   template <typename T, typename Context>
-  void processKeyWithDefault(const char *Key, std::optional<T> &Val,
+  void processKeyWithDefault(StringRef Key, std::optional<T> &Val,
                              const std::optional<T> &DefaultValue,
                              bool Required, Context &Ctx);
 
   template <typename T, typename Context>
-  void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue,
+  void processKeyWithDefault(StringRef Key, T &Val, const T &DefaultValue,
                              bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
@@ -867,7 +867,7 @@ class LLVM_ABI IO {
   }
 
   template <typename T, typename Context>
-  void processKey(const char *Key, T &Val, bool Required, Context &Ctx) {
+  void processKey(StringRef Key, T &Val, bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
     if (this->preflightKey(Key, Required, false, UseDefault, SaveInfo)) {
@@ -1342,7 +1342,7 @@ class LLVM_ABI Input : public IO {
   bool mapTag(StringRef, bool) override;
   void beginMapping() override;
   void endMapping() override;
-  bool preflightKey(const char *, bool, bool, bool &, void *&) override;
+  bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
   std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
@@ -1356,11 +1356,11 @@ class LLVM_ABI Input : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char *, bool) override;
+  bool matchEnumScalar(StringRef, bool) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
-  bool bitSetMatch(const char *, bool) override;
+  bool bitSetMatch(StringRef, bool) override;
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
@@ -1493,7 +1493,7 @@ class LLVM_ABI Output : public IO {
   bool mapTag(StringRef, bool) override;
   void beginMapping() override;
   void endMapping() override;
-  bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
+  bool preflightKey(StringRef Key, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
   std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
@@ -1507,11 +1507,11 @@ class LLVM_ABI Output : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char *, bool) override;
+  bool matchEnumScalar(StringRef, bool) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
-  bool bitSetMatch(const char *, bool) override;
+  bool bitSetMatch(StringRef, bool) override;
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
@@ -1568,7 +1568,7 @@ class LLVM_ABI Output : public IO {
 };
 
 template <typename T, typename Context>
-void IO::processKeyWithDefault(const char *Key, std::optional<T> &Val,
+void IO::processKeyWithDefault(StringRef Key, std::optional<T> &Val,
                                const std::optional<T> &DefaultValue,
                                bool Required, Context &Ctx) {
   assert(!DefaultValue && "std::optional<T> shouldn't have a value!");
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 846a72182d8f0..a25a575925137 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -118,9 +118,9 @@ template <> struct MappingTraits<Argument> {
 
     if (StringRef(A.Val).count('\n') > 1) {
       StringBlockVal S(A.Val);
-      io.mapRequired(A.Key.data(), S);
+      io.mapRequired(A.Key, S);
     } else {
-      io.mapRequired(A.Key.data(), A.Val);
+      io.mapRequired(A.Key, A.Val);
     }
     io.mapOptional("DebugLoc", A.Loc);
   }
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 035828b594e84..95a41eafdf5e4 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -144,7 +144,7 @@ std::vector<StringRef> Input::keys() {
   return Ret;
 }
 
-bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
+bool Input::preflightKey(StringRef Key, bool Required, bool, bool &UseDefault,
                          void *&SaveInfo) {
   UseDefault = false;
   if (EC)
@@ -168,7 +168,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
       UseDefault = true;
     return false;
   }
-  MN->ValidKeys.push_back(Key);
+  MN->ValidKeys.push_back(Key.str());
   HNode *Value = MN->Mapping[Key].first;
   if (!Value) {
     if (Required)
@@ -266,7 +266,7 @@ void Input::beginEnumScalar() {
   ScalarMatchFound = false;
 }
 
-bool Input::matchEnumScalar(const char *Str, bool) {
+bool Input::matchEnumScalar(StringRef Str, bool) {
   if (ScalarMatchFound)
     return false;
   if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
@@ -302,7 +302,7 @@ bool Input::beginBitSetScalar(bool &DoClear) {
   return true;
 }
 
-bool Input::bitSetMatch(const char *Str, bool) {
+bool Input::bitSetMatch(StringRef Str, bool) {
   if (EC)
     return false;
   if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
@@ -541,7 +541,7 @@ std::vector<StringRef> Output::keys() {
   report_fatal_error("invalid call");
 }
 
-bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
+bool Output::preflightKey(StringRef Key, bool Required, bool SameAsDefault,
                           bool &UseDefault, void *&SaveInfo) {
   UseDefault = false;
   SaveInfo = nullptr;
@@ -666,7 +666,7 @@ void Output::beginEnumScalar() {
   EnumerationMatchFound = false;
 }
 
-bool Output::matchEnumScalar(const char *Str, bool Match) {
+bool Output::matchEnumScalar(StringRef Str, bool Match) {
   if (Match && !EnumerationMatchFound) {
     newLineCheck();
     outputUpToEndOfLine(Str);
@@ -695,7 +695,7 @@ bool Output::beginBitSetScalar(bool &DoClear) {
   return true;
 }
 
-bool Output::bitSetMatch(const char *Str, bool Matches) {
+bool Output::bitSetMatch(StringRef Str, bool Matches) {
   if (Matches) {
     if (NeedBitValueComma)
       output(", ");
diff --git a/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp b/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp
index 7e994ac4d58bc..6431cbb6aa00a 100644
--- a/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp
+++ b/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp
@@ -165,3 +165,33 @@ TEST(YAMLRemarks, SerializerRemarkParsedStrTabStandaloneNoStrTab) {
                 "...\n"),
       std::move(PreFilledStrTab));
 }
+
+TEST(YAMLRemarks, SerializerRemarkStringRefOOBRead) {
+  remarks::Remark R;
+  R.RemarkType = remarks::Type::Missed;
+  R.PassName = StringRef("passAAAA", 4);
+  R.RemarkName = StringRef("nameAAAA", 4);
+  R.FunctionName = StringRef("funcAAAA", 4);
+  R.Loc = remarks::RemarkLocation{StringRef("pathAAAA", 4), 3, 4};
+  R.Hotness = 5;
+  R.Args.emplace_back();
+  R.Args.back().Key = StringRef("keyAAAA", 3);
+  R.Args.back().Val = StringRef("valueAAAA", 5);
+  R.Args.emplace_back();
+  R.Args.back().Key = StringRef("keydebugAAAA", 8);
+  R.Args.back().Val = StringRef("valuedebugAAAA", 10);
+  R.Args.back().Loc =
+      remarks::RemarkLocation{StringRef("argpathAAAA", 7), 6, 7};
+  checkStandalone(remarks::Format::YAML, R,
+                  "--- !Missed\n"
+                  "Pass:            pass\n"
+                  "Name:            name\n"
+                  "DebugLoc:        { File: path, Line: 3, Column: 4 }\n"
+                  "Function:        func\n"
+                  "Hotness:         5\n"
+                  "Args:\n"
+                  "  - key:             value\n"
+                  "  - keydebug:        valuedebug\n"
+                  "    DebugLoc:        { File: argpath, Line: 6, Column: 7 }\n"
+                  "...\n");
+}
\ No newline at end of file



More information about the llvm-commits mailing list