[clang] [clang] Bounds checking on unclosed parentheses, brackets or braces in Expanded Tokens (PR #69849)

via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 21 08:16:26 PDT 2023


https://github.com/robozati created https://github.com/llvm/llvm-project/pull/69849

When getting the AST from clangd, if there is an unclosed token, a segfault or an UB can happen when accessing `File.SpelledTokens` because the supposed closing token is not present, so its index is greater than the vec's size.

This fixes the issue by returning a `nullptr` on the function that caused the UB and then checking if the spelled token is null, returning `nullopt` for the whole expanded token.

Fixes https://github.com/clangd/clangd/issues/1559.

>From e57657cf296e1e93fa17f021bb8ce591071b2176 Mon Sep 17 00:00:00 2001
From: robozati <139823421+robozati at users.noreply.github.com>
Date: Sat, 21 Oct 2023 12:04:14 -0300
Subject: [PATCH] Fix UB on unclosed parentheses, brackets or braces in Tokens

---
 clang/lib/Tooling/Syntax/Tokens.cpp           | 23 ++++++++++++++-----
 clang/unittests/Tooling/Syntax/TokensTest.cpp |  6 +++++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 2f28b9cf286a638..e4c24aa1ef3fbf0 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -286,9 +286,13 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   });
   // Our token could only be produced by the previous mapping.
   if (It == File.Mappings.begin()) {
-    // No previous mapping, no need to modify offsets.
-    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
-            /*Mapping=*/nullptr};
+    const auto offset = ExpandedIndex - File.BeginExpanded;
+    // Bounds check so parsing an unclosed parenthesis, brackets or braces do
+    // not result in UB.
+    if (offset >= File.SpelledTokens.size()) {
+      return {nullptr, nullptr};
+    }
+    return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
   }
   --It; // 'It' now points to last mapping that started before our token.
 
@@ -298,9 +302,12 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
 
   // Not part of the mapping, use the index from previous mapping to compute the
   // corresponding spelled token.
-  return {
-      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
-      /*Mapping=*/nullptr};
+  const auto offset = It->EndSpelled + (ExpandedIndex - It->EndExpanded);
+  // This index can also result in UB when parsing unclosed tokens.
+  if (offset >= File.SpelledTokens.size()) {
+    return {nullptr, nullptr};
+  }
+  return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
 }
 
 const TokenBuffer::Mapping *
@@ -410,6 +417,10 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
   auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
   auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
 
+  if (FirstSpelled == nullptr || LastSpelled == nullptr) {
+    return std::nullopt;
+  };
+
   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
   // FIXME: Handle multi-file changes by trying to map onto a common root.
   if (FID != SourceMgr->getFileID(LastSpelled->location()))
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 0c08318a637c0b6..44f0c57b3bad875 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -814,6 +814,12 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
               ValueIs(SameRange(findSpelled("good"))));
   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
+
+  // Unclosed parentheses, brackets or braces do not result in a UB
+  recordTokens(R"cpp(
+    foo(x
+  )cpp");
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("x")), std::nullopt);
 }
 
 TEST_F(TokenBufferTest, ExpandedTokensForRange) {



More information about the cfe-commits mailing list