[clang] [clang] Bounds checking on unclosed parentheses, brackets or braces in Expanded Tokens (PR #69849)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Oct 21 15:27:05 PDT 2023
https://github.com/robozati updated https://github.com/llvm/llvm-project/pull/69849
>From 239302d66dea74333e27288acbceffdf3f2d3b6b Mon Sep 17 00:00:00 2001
From: Gabriel Pezati <gabrielpezati at gmail.com>
Date: Sat, 21 Oct 2023 19:28:57 -0300
Subject: [PATCH] Fix UB on unclosed parentheses, brackets or braces in Tokens
---
clang/lib/Tooling/Syntax/Tokens.cpp | 25 ++++++++++++++-----
clang/unittests/Tooling/Syntax/TokensTest.cpp | 11 ++++++++
2 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 2f28b9cf286a638..97f794e09cb5590 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -286,9 +286,13 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
});
// Our token could only be produced by the previous mapping.
if (It == File.Mappings.begin()) {
- // No previous mapping, no need to modify offsets.
- return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
- /*Mapping=*/nullptr};
+ const auto offset = ExpandedIndex - File.BeginExpanded;
+ // Bounds check so parsing an unclosed parenthesis, brackets or braces do
+ // not result in UB.
+ if (offset >= File.SpelledTokens.size()) {
+ return {nullptr, nullptr};
+ }
+ return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
}
--It; // 'It' now points to last mapping that started before our token.
@@ -298,9 +302,12 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
// Not part of the mapping, use the index from previous mapping to compute the
// corresponding spelled token.
- return {
- &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
- /*Mapping=*/nullptr};
+ const auto offset = It->EndSpelled + (ExpandedIndex - It->EndExpanded);
+ // This index can also result in UB when parsing unclosed tokens.
+ if (offset >= File.SpelledTokens.size()) {
+ return {nullptr, nullptr};
+ }
+ return {&File.SpelledTokens[offset], /*Mapping=*/nullptr};
}
const TokenBuffer::Mapping *
@@ -410,6 +417,12 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
+ // If there was an unclosed token, FirstSpelled or LastSpelled is null and the
+ // operation shouldn't continue.
+ if (FirstSpelled == nullptr || LastSpelled == nullptr) {
+ return std::nullopt;
+ };
+
FileID FID = SourceMgr->getFileID(FirstSpelled->location());
// FIXME: Handle multi-file changes by trying to map onto a common root.
if (FID != SourceMgr->getFileID(LastSpelled->location()))
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 0c08318a637c0b6..e1b053ce45b841b 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -585,6 +585,17 @@ TEST_F(TokenCollectorTest, DelayedParsing) {
EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
}
+TEST_F(TokenCollectorTest, UnclosedToken) {
+ llvm::StringLiteral Code = R"cpp(
+ int main() {
+ // this should not result in a segfault or UB.
+ )cpp";
+ std::string ExpectedTokens =
+ "expanded tokens:\n int main ( ) {\nfile './input.cpp'\n spelled "
+ "tokens:\n int main ( ) {\n no mappings.\n";
+ EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
+}
+
TEST_F(TokenCollectorTest, MultiFile) {
addFile("./foo.h", R"cpp(
#define ADD(X, Y) X+Y
More information about the cfe-commits
mailing list