[clang] [clang] Bounds checking on unclosed parentheses, brackets or braces in Expanded Tokens (PR #69849)

via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 21 15:27:05 PDT 2023


https://github.com/robozati updated https://github.com/llvm/llvm-project/pull/69849

>From 239302d66dea74333e27288acbceffdf3f2d3b6b Mon Sep 17 00:00:00 2001
From: Gabriel Pezati <gabrielpezati at gmail.com>
Date: Sat, 21 Oct 2023 19:28:57 -0300
Subject: [PATCH] Fix UB on unclosed parentheses, brackets or braces in Tokens

---
 clang/lib/Tooling/Syntax/Tokens.cpp           | 25 ++++++++++++++-----
 clang/unittests/Tooling/Syntax/TokensTest.cpp | 11 ++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 2f28b9cf286a638..97f794e09cb5590 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -286,9 +286,13 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   });
   // Our token could only be produced by the previous mapping.
   if (It == File.Mappings.begin()) {
-    // No previous mapping, no need to modify offsets.
-    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
-            /*Mapping=*/nullptr};
+    const auto offset = ExpandedIndex - File.BeginExpanded;
+    // Bounds check so parsing an unclosed parenthesis, brackets or braces do
+    // not result in UB.
+    if (offset >= File.SpelledTokens.size()) {
+      return {nullptr, nullptr};
+    }
+    return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
   }
   --It; // 'It' now points to last mapping that started before our token.
 
@@ -298,9 +302,12 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
 
   // Not part of the mapping, use the index from previous mapping to compute the
   // corresponding spelled token.
-  return {
-      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
-      /*Mapping=*/nullptr};
+  const auto offset = It->EndSpelled + (ExpandedIndex - It->EndExpanded);
+  // This index can also result in UB when parsing unclosed tokens.
+  if (offset >= File.SpelledTokens.size()) {
+    return {nullptr, nullptr};
+  }
+  return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
 }
 
 const TokenBuffer::Mapping *
@@ -410,6 +417,12 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
   auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
   auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
 
+  // If there was an unclosed token, FirstSpelled or LastSpelled is null and the
+  // operation shouldn't continue.
+  if (FirstSpelled == nullptr || LastSpelled == nullptr) {
+    return std::nullopt;
+  };
+
   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
   // FIXME: Handle multi-file changes by trying to map onto a common root.
   if (FID != SourceMgr->getFileID(LastSpelled->location()))
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 0c08318a637c0b6..e1b053ce45b841b 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -585,6 +585,17 @@ TEST_F(TokenCollectorTest, DelayedParsing) {
   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
 }
 
+TEST_F(TokenCollectorTest, UnclosedToken) {
+  llvm::StringLiteral Code = R"cpp(
+    int main() {
+    // this should not result in a segfault or UB.
+  )cpp";
+  std::string ExpectedTokens =
+      "expanded tokens:\n  int main ( ) {\nfile './input.cpp'\n  spelled "
+      "tokens:\n    int main ( ) {\n  no mappings.\n";
+  EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
+}
+
 TEST_F(TokenCollectorTest, MultiFile) {
   addFile("./foo.h", R"cpp(
     #define ADD(X, Y) X+Y



More information about the cfe-commits mailing list