[clang] [llvm] [VFS] Guard against null key/value nodes when parsing YAML overlay (PR #190506)

Henry Jiang via cfe-commits cfe-commits at lists.llvm.org
Sun Apr 5 23:33:17 PDT 2026


https://github.com/mustartt updated https://github.com/llvm/llvm-project/pull/190506

>From 89ae54f54fbd942f30df84ca96d37f40fd50ba1c Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry_jiang2 at apple.com>
Date: Sat, 4 Apr 2026 23:32:06 -0700
Subject: [PATCH] Guard against parse error

---
 clang/lib/CrossTU/CrossTranslationUnit.cpp       |  6 ++++--
 clang/lib/Tooling/JSONCompilationDatabase.cpp    |  3 ++-
 clang/test/VFS/Inputs/invalid-key.yaml           |  9 +++++++++
 clang/test/VFS/Inputs/invalid-top-level-key.yaml |  5 +++++
 clang/test/VFS/parse-errors.c                    |  6 ++++++
 llvm/lib/Remarks/YAMLRemarkParser.cpp            | 13 +++++++------
 llvm/lib/Support/VirtualFileSystem.cpp           |  6 +++---
 llvm/lib/Support/YAMLTraits.cpp                  |  2 +-
 llvm/lib/Transforms/Utils/SymbolRewriter.cpp     | 16 ++++++++--------
 llvm/tools/sancov/sancov.cpp                     | 10 +++++-----
 10 files changed, 50 insertions(+), 26 deletions(-)
 create mode 100644 clang/test/VFS/Inputs/invalid-key.yaml
 create mode 100644 clang/test/VFS/Inputs/invalid-top-level-key.yaml

diff --git a/clang/lib/CrossTU/CrossTranslationUnit.cpp b/clang/lib/CrossTU/CrossTranslationUnit.cpp
index 8dd0ef13123d1..00b4fb504e9f6 100644
--- a/clang/lib/CrossTU/CrossTranslationUnit.cpp
+++ b/clang/lib/CrossTU/CrossTranslationUnit.cpp
@@ -679,7 +679,8 @@ parseInvocationList(StringRef FileContent, llvm::sys::path::Style PathStyle) {
 
   for (auto &NextMapping : *Mappings) {
     /// The keys should be strings, which represent a source-file path.
-    auto *Key = dyn_cast<llvm::yaml::ScalarNode>(NextMapping.getKey());
+    auto *Key =
+        dyn_cast_if_present<llvm::yaml::ScalarNode>(NextMapping.getKey());
     if (!Key)
       return llvm::make_error<IndexError>(
           index_error_code::invocation_list_wrong_format);
@@ -699,7 +700,8 @@ parseInvocationList(StringRef FileContent, llvm::sys::path::Style PathStyle) {
 
     /// The values should be sequences of strings, each representing a part of
     /// the invocation.
-    auto *Args = dyn_cast<llvm::yaml::SequenceNode>(NextMapping.getValue());
+    auto *Args =
+        dyn_cast_if_present<llvm::yaml::SequenceNode>(NextMapping.getValue());
     if (!Args)
       return llvm::make_error<IndexError>(
           index_error_code::invocation_list_wrong_format);
diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp
index 31d39076749f8..0efa75970d986 100644
--- a/clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -347,7 +347,8 @@ bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
     llvm::yaml::ScalarNode *File = nullptr;
     llvm::yaml::ScalarNode *Output = nullptr;
     for (auto& NextKeyValue : *Object) {
-      auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
+      auto *KeyString =
+          dyn_cast_if_present<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
       if (!KeyString) {
         ErrorMessage = "Expected strings as key.";
         return false;
diff --git a/clang/test/VFS/Inputs/invalid-key.yaml b/clang/test/VFS/Inputs/invalid-key.yaml
new file mode 100644
index 0000000000000..3d873d1a163b2
--- /dev/null
+++ b/clang/test/VFS/Inputs/invalid-key.yaml
@@ -0,0 +1,9 @@
+# NOTE: This file contains an intentional tab character that triggers a YAML
+# parse failure. Do not replace tabs with spaces.
+version: 0
+redirecting-with: fallthrough
+roots:
+  - type: directory-remap
+	name: test
+    external-contents: test
+
diff --git a/clang/test/VFS/Inputs/invalid-top-level-key.yaml b/clang/test/VFS/Inputs/invalid-top-level-key.yaml
new file mode 100644
index 0000000000000..dd2071a9f6739
--- /dev/null
+++ b/clang/test/VFS/Inputs/invalid-top-level-key.yaml
@@ -0,0 +1,5 @@
+# NOTE: This file contains an intentional tab character that triggers a YAML
+# parse failure. Do not replace tabs with spaces.
+version: 0
+	redirecting-with: fallthrough
+roots: []
diff --git a/clang/test/VFS/parse-errors.c b/clang/test/VFS/parse-errors.c
index 8aa208438bcca..88f59a4c61067 100644
--- a/clang/test/VFS/parse-errors.c
+++ b/clang/test/VFS/parse-errors.c
@@ -20,3 +20,9 @@
 // RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/redirect-and-fallthrough.yaml -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-EXCLUSIVE-KEYS %s
 // CHECK-EXCLUSIVE-KEYS: 'fallthrough' and 'redirecting-with' are mutually exclusive
 // CHECK-EXCLUSIVE-KEYS: invalid virtual filesystem overlay file
+
+// RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/invalid-key.yaml -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-KEY %s
+// CHECK-INVALID-KEY: invalid virtual filesystem overlay file
+
+// RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/invalid-top-level-key.yaml -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-TOP %s
+// CHECK-INVALID-TOP: invalid virtual filesystem overlay file
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp
index a8b1c0cc29ad6..33f659eaaa49b 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -218,7 +218,8 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
       else
         return MaybeLoc.takeError();
     } else if (KeyName == "Args") {
-      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+      auto *Args =
+          dyn_cast_if_present<yaml::SequenceNode>(RemarkField.getValue());
       if (!Args)
         return error("wrong value type for key.", RemarkField);
 
@@ -257,19 +258,19 @@ Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
 }
 
 Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
-  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
+  if (auto *Key = dyn_cast_if_present<yaml::ScalarNode>(Node.getKey()))
     return Key->getRawValue();
 
   return error("key is not a string.", Node);
 }
 
 Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  auto *Value = dyn_cast_if_present<yaml::ScalarNode>(Node.getValue());
   yaml::BlockScalarNode *ValueBlock;
   StringRef Result;
   if (!Value) {
     // Try to parse the value as a block node.
-    ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
+    ValueBlock = dyn_cast_if_present<yaml::BlockScalarNode>(Node.getValue());
     if (!ValueBlock)
       return error("expected a value of scalar type.", Node);
     Result = ValueBlock->getValue();
@@ -284,7 +285,7 @@ Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
 
 Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
   SmallVector<char, 4> Tmp;
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  auto *Value = dyn_cast_if_present<yaml::ScalarNode>(Node.getValue());
   if (!Value)
     return error("expected a value of scalar type.", Node);
   unsigned UnsignedValue = 0;
@@ -295,7 +296,7 @@ Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
 
 Expected<RemarkLocation>
 YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
-  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+  auto *DebugLoc = dyn_cast_if_present<yaml::MappingNode>(Node.getValue());
   if (!DebugLoc)
     return error("expected a value of mapping type.", Node);
 
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 8a7ca35a8dd25..69f3bb8582b87 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -1668,7 +1668,7 @@ class llvm::vfs::RedirectingFileSystemParser {
   // false on error
   bool parseScalarString(yaml::Node *N, StringRef &Result,
                          SmallVectorImpl<char> &Storage) {
-    const auto *S = dyn_cast<yaml::ScalarNode>(N);
+    const auto *S = dyn_cast_if_present<yaml::ScalarNode>(N);
 
     if (!S) {
       error(N, "expected string");
@@ -1913,7 +1913,7 @@ class llvm::vfs::RedirectingFileSystemParser {
           return nullptr;
         }
         ContentsField = CF_List;
-        auto *Contents = dyn_cast<yaml::SequenceNode>(I.getValue());
+        auto *Contents = dyn_cast_if_present<yaml::SequenceNode>(I.getValue());
         if (!Contents) {
           // FIXME: this is only for directories, what about files?
           error(I.getValue(), "expected array");
@@ -2115,7 +2115,7 @@ class llvm::vfs::RedirectingFileSystemParser {
         return false;
 
       if (Key == "roots") {
-        auto *Roots = dyn_cast<yaml::SequenceNode>(I.getValue());
+        auto *Roots = dyn_cast_if_present<yaml::SequenceNode>(I.getValue());
         if (!Roots) {
           error(I.getValue(), "expected array");
           return false;
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 95a41eafdf5e4..d24e95e1e7487 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -430,7 +430,7 @@ Input::HNode *Input::createHNodes(Node *N) {
     auto mapHNode = new (MapHNodeAllocator.Allocate()) MapHNode(N);
     for (KeyValueNode &KVN : *Map) {
       Node *KeyNode = KVN.getKey();
-      ScalarNode *Key = dyn_cast_or_null<ScalarNode>(KeyNode);
+      ScalarNode *Key = dyn_cast_if_present<ScalarNode>(KeyNode);
       Node *Value = KVN.getValue();
       if (!Key || !Value) {
         if (!Key)
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 6319fd524ff0f..4f361c3eb6720 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -295,13 +295,13 @@ bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
   SmallString<32> KeyStorage;
   StringRef RewriteType;
 
-  Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
+  Key = dyn_cast_if_present<yaml::ScalarNode>(Entry.getKey());
   if (!Key) {
     YS.printError(Entry.getKey(), "rewrite type must be a scalar");
     return false;
   }
 
-  Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
+  Value = dyn_cast_if_present<yaml::MappingNode>(Entry.getValue());
   if (!Value) {
     YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
     return false;
@@ -335,13 +335,13 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;
@@ -408,13 +408,13 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor Key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;
@@ -475,13 +475,13 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index 931f87731681a..45caa5f8c9933 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -390,7 +390,7 @@ static void operator<<(json::OStream &W, const SymbolizedCoverage &C) {
 
 static std::string parseScalarString(yaml::Node *N) {
   SmallString<64> StringStorage;
-  yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
+  yaml::ScalarNode *S = dyn_cast_if_present<yaml::ScalarNode>(N);
   failIf(!S, "expected string");
   return std::string(S->getValue(StringStorage));
 }
@@ -419,7 +419,7 @@ SymbolizedCoverage::read(const std::string &InputFile) {
 
     if (Key == "covered-points") {
       yaml::SequenceNode *Points =
-          dyn_cast<yaml::SequenceNode>(KVNode.getValue());
+          dyn_cast_if_present<yaml::SequenceNode>(KVNode.getValue());
       failIf(!Points, "expected array: " + InputFile);
 
       for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
@@ -429,21 +429,21 @@ SymbolizedCoverage::read(const std::string &InputFile) {
       Coverage->BinaryHash = parseScalarString(KVNode.getValue());
     } else if (Key == "point-symbol-info") {
       yaml::MappingNode *PointSymbolInfo =
-          dyn_cast<yaml::MappingNode>(KVNode.getValue());
+          dyn_cast_if_present<yaml::MappingNode>(KVNode.getValue());
       failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
 
       for (auto &FileKVNode : *PointSymbolInfo) {
         auto Filename = parseScalarString(FileKVNode.getKey());
 
         yaml::MappingNode *FileInfo =
-            dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
+            dyn_cast_if_present<yaml::MappingNode>(FileKVNode.getValue());
         failIf(!FileInfo, "expected mapping node: " + InputFile);
 
         for (auto &FunctionKVNode : *FileInfo) {
           auto FunctionName = parseScalarString(FunctionKVNode.getKey());
 
           yaml::MappingNode *FunctionInfo =
-              dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
+              dyn_cast_if_present<yaml::MappingNode>(FunctionKVNode.getValue());
           failIf(!FunctionInfo, "expected mapping node: " + InputFile);
 
           for (auto &PointKVNode : *FunctionInfo) {



More information about the cfe-commits mailing list