[clang-tools-extra] [clang-tidy] [NFC] Move comment scanning to `LexerUtils` and add tests (PR #180371)
Daniil Dudkin via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 12 13:01:28 PST 2026
https://github.com/unterumarmung updated https://github.com/llvm/llvm-project/pull/180371
>From 3d45791cd4724de984e74ab2d44fde2b6f0a293a Mon Sep 17 00:00:00 2001
From: Daniil Dudkin <unterumarmung at yandex.ru>
Date: Sun, 8 Feb 2026 00:15:19 +0300
Subject: [PATCH] [clang-tidy] [NFC] Move comment scanning to `LexerUtils` and
add tests
---
.../bugprone/ArgumentCommentCheck.cpp | 74 ++-------
.../clang-tidy/utils/LexerUtils.cpp | 61 ++++++++
.../clang-tidy/utils/LexerUtils.h | 13 ++
.../unittests/clang-tidy/CMakeLists.txt | 1 +
.../unittests/clang-tidy/LexerUtilsTest.cpp | 144 ++++++++++++++++++
5 files changed, 235 insertions(+), 58 deletions(-)
create mode 100644 clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
index d46896808bd09..eb9b710b28549 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
@@ -17,6 +17,8 @@
using namespace clang::ast_matchers;
namespace clang::tidy::bugprone {
+
+using utils::lexer::CommentToken;
namespace {
AST_MATCHER(Decl, isFromStdNamespaceOrSystemHeader) {
if (const auto *D = Node.getDeclContext()->getEnclosingNamespaceContext())
@@ -77,55 +79,9 @@ void ArgumentCommentCheck::registerMatchers(MatchFinder *Finder) {
this);
}
-static std::vector<std::pair<SourceLocation, StringRef>>
-getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) {
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
- auto &SM = Ctx->getSourceManager();
- const std::pair<FileID, unsigned> BeginLoc =
- SM.getDecomposedLoc(Range.getBegin()),
- EndLoc =
- SM.getDecomposedLoc(Range.getEnd());
-
- if (BeginLoc.first != EndLoc.first)
- return Comments;
-
- bool Invalid = false;
- const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid);
- if (Invalid)
- return Comments;
-
- const char *StrData = Buffer.data() + BeginLoc.second;
-
- Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(),
- Buffer.begin(), StrData, Buffer.end());
- TheLexer.SetCommentRetentionState(true);
-
- while (true) {
- Token Tok;
- if (TheLexer.LexFromRawLexer(Tok))
- break;
- if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof))
- break;
-
- if (Tok.is(tok::comment)) {
- const std::pair<FileID, unsigned> CommentLoc =
- SM.getDecomposedLoc(Tok.getLocation());
- assert(CommentLoc.first == BeginLoc.first);
- Comments.emplace_back(
- Tok.getLocation(),
- StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()));
- } else {
- // Clear comments found before the different token, e.g. comma.
- Comments.clear();
- }
- }
-
- return Comments;
-}
-
-static std::vector<std::pair<SourceLocation, StringRef>>
-getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) {
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
+static std::vector<CommentToken> getCommentsBeforeLoc(ASTContext *Ctx,
+ SourceLocation Loc) {
+ std::vector<CommentToken> Comments;
while (Loc.isValid()) {
const std::optional<Token> Tok = utils::lexer::getPreviousToken(
Loc, Ctx->getSourceManager(), Ctx->getLangOpts(),
@@ -133,11 +89,12 @@ getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) {
if (!Tok || Tok->isNot(tok::comment))
break;
Loc = Tok->getLocation();
- Comments.emplace_back(
+ Comments.emplace_back(CommentToken{
Loc,
Lexer::getSourceText(CharSourceRange::getCharRange(
Loc, Loc.getLocWithOffset(Tok->getLength())),
- Ctx->getSourceManager(), Ctx->getLangOpts()));
+ Ctx->getSourceManager(), Ctx->getLangOpts()),
+ });
}
return Comments;
}
@@ -304,9 +261,10 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx,
MakeFileCharRange(ArgBeginLoc, Args[I]->getBeginLoc());
ArgBeginLoc = Args[I]->getEndLoc();
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
+ std::vector<CommentToken> Comments;
if (BeforeArgument.isValid()) {
- Comments = getCommentsInRange(Ctx, BeforeArgument);
+ Comments = utils::lexer::getTrailingCommentsInRange(
+ BeforeArgument, Ctx->getSourceManager(), Ctx->getLangOpts());
} else {
// Fall back to parsing back from the start of the argument.
const CharSourceRange ArgsRange =
@@ -314,18 +272,18 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx,
Comments = getCommentsBeforeLoc(Ctx, ArgsRange.getBegin());
}
- for (auto Comment : Comments) {
+ for (const auto &Comment : Comments) {
llvm::SmallVector<StringRef, 2> Matches;
- if (IdentRE.match(Comment.second, &Matches) &&
+ if (IdentRE.match(Comment.Text, &Matches) &&
!sameName(Matches[2], II->getName(), StrictMode)) {
{
const DiagnosticBuilder Diag =
- diag(Comment.first, "argument name '%0' in comment does not "
- "match parameter name %1")
+ diag(Comment.Loc, "argument name '%0' in comment does not "
+ "match parameter name %1")
<< Matches[2] << II;
if (isLikelyTypo(Callee->parameters(), Matches[2], II->getName())) {
Diag << FixItHint::CreateReplacement(
- Comment.first, (Matches[1] + II->getName() + Matches[3]).str());
+ Comment.Loc, (Matches[1] + II->getName() + Matches[3]).str());
}
}
diag(PVD->getLocation(), "%0 declared here", DiagnosticIDs::Note) << II;
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index b77d985b76d77..b7de34f7e2e00 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -10,6 +10,7 @@
#include "clang/Basic/SourceManager.h"
#include <optional>
#include <utility>
+#include <vector>
namespace clang::tidy::utils::lexer {
@@ -99,6 +100,66 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range,
return false;
}
+std::vector<CommentToken>
+getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ std::vector<CommentToken> Comments;
+ if (Range.isInvalid())
+ return Comments;
+
+ const CharSourceRange FileRange =
+ Lexer::makeFileCharRange(Range, SM, LangOpts);
+ if (FileRange.isInvalid())
+ return Comments;
+
+ const std::pair<FileID, unsigned> BeginLoc =
+ SM.getDecomposedLoc(FileRange.getBegin());
+ const std::pair<FileID, unsigned> EndLoc =
+ SM.getDecomposedLoc(FileRange.getEnd());
+
+ if (BeginLoc.first != EndLoc.first)
+ return Comments;
+
+ bool Invalid = false;
+ const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid);
+ if (Invalid)
+ return Comments;
+
+ const char *StrData = Buffer.data() + BeginLoc.second;
+
+ Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts,
+ Buffer.begin(), StrData, Buffer.end());
+ // Use raw lexing with comment retention so we can see comment tokens without
+ // preprocessing or macro expansion effects.
+ TheLexer.SetCommentRetentionState(true);
+
+ while (true) {
+ Token Tok;
+ if (TheLexer.LexFromRawLexer(Tok))
+ break;
+ if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
+ SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
+ break;
+
+ if (Tok.is(tok::comment)) {
+ const std::pair<FileID, unsigned> CommentLoc =
+ SM.getDecomposedLoc(Tok.getLocation());
+ assert(CommentLoc.first == BeginLoc.first);
+ Comments.emplace_back(CommentToken{
+ Tok.getLocation(),
+ StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()),
+ });
+ } else {
+ // Clear comments found before the different token, e.g. comma. Callers
+ // use this to retrieve only the contiguous comment block that directly
+ // precedes a token of interest.
+ Comments.clear();
+ }
+ }
+
+ return Comments;
+}
+
std::optional<Token> getQualifyingToken(tok::TokenKind TK,
CharSourceRange Range,
const ASTContext &Context,
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index 61389d86f22f4..38123ae14cff7 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -14,6 +14,7 @@
#include "clang/Lex/Lexer.h"
#include <optional>
#include <utility>
+#include <vector>
namespace clang {
@@ -113,6 +114,18 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range,
const SourceManager &SM,
const LangOptions &LangOpts);
+// Represents a comment token and its source location in the original file.
+struct CommentToken {
+ SourceLocation Loc;
+ StringRef Text;
+};
+
+/// Returns comment tokens found in the given range. If a non-comment token is
+/// encountered, clears previously collected comments and continues.
+std::vector<CommentToken>
+getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
+ const LangOptions &LangOpts);
+
/// Assuming that ``Range`` spans a CVR-qualified type, returns the
/// token in ``Range`` that is responsible for the qualification. ``Range``
/// must be valid with respect to ``SM``. Returns ``std::nullopt`` if no
diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
index 64bf47e61736c..167f5d3def06b 100644
--- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
@@ -24,6 +24,7 @@ add_extra_unittest(ClangTidyTests
DeclRefExprUtilsTest.cpp
IncludeCleanerTest.cpp
IncludeInserterTest.cpp
+ LexerUtilsTest.cpp
GlobListTest.cpp
GoogleModuleTest.cpp
LLVMModuleTest.cpp
diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
new file mode 100644
index 0000000000000..438a78b4694ee
--- /dev/null
+++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
@@ -0,0 +1,144 @@
+//===--- LexerUtilsTest.cpp - clang-tidy ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../clang-tidy/utils/LexerUtils.h"
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/Serialization/PCHContainerOperations.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Testing/Annotations/Annotations.h"
+#include "gtest/gtest.h"
+
+namespace clang::tidy::test {
+
+using clang::tooling::FileContentMappings;
+
+static std::unique_ptr<ASTUnit>
+buildAST(StringRef Code, const FileContentMappings &Mappings = {}) {
+ std::vector<std::string> Args = {"-std=c++20"};
+ return clang::tooling::buildASTFromCodeWithArgs(
+ Code, Args, "input.cc", "clang-tool",
+ std::make_shared<PCHContainerOperations>(),
+ clang::tooling::getClangStripDependencyFileAdjuster(), Mappings);
+}
+
+static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A,
+ const SourceManager &SM, FileID FID,
+ llvm::StringRef Name = "") {
+ const auto R = A.range(Name);
+ const SourceLocation Begin =
+ SM.getLocForStartOfFile(FID).getLocWithOffset(R.Begin);
+ const SourceLocation End =
+ SM.getLocForStartOfFile(FID).getLocWithOffset(R.End);
+ return CharSourceRange::getCharRange(Begin, End);
+}
+
+namespace {
+
+TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
+ llvm::Annotations Code(R"cpp(
+void f() {
+ $range[[/*first*/ /*second*/]]
+ int x = 0;
+}
+)cpp");
+ std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+ ASSERT_TRUE(AST);
+ const ASTContext &Context = AST->getASTContext();
+ const SourceManager &SM = Context.getSourceManager();
+ const LangOptions &LangOpts = Context.getLangOpts();
+
+ const CharSourceRange Range =
+ rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+ const std::vector<utils::lexer::CommentToken> Comments =
+ utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts);
+ ASSERT_EQ(2u, Comments.size());
+ EXPECT_EQ("/*first*/", Comments[0].Text);
+ EXPECT_EQ("/*second*/", Comments[1].Text);
+ const StringRef CodeText = Code.code();
+ const size_t FirstOffset = CodeText.find("/*first*/");
+ ASSERT_NE(StringRef::npos, FirstOffset);
+ const size_t SecondOffset = CodeText.find("/*second*/");
+ ASSERT_NE(StringRef::npos, SecondOffset);
+ EXPECT_EQ(FirstOffset, SM.getFileOffset(Comments[0].Loc));
+ EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments[1].Loc));
+}
+
+TEST(LexerUtilsTest, GetTrailingCommentsInRangeClearsOnToken) {
+ llvm::Annotations Code(R"cpp(
+void f() {
+ int x = ($range[[/*first*/ 0, /*second*/]] 1);
+}
+)cpp");
+ std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+ ASSERT_TRUE(AST);
+ const ASTContext &Context = AST->getASTContext();
+ const SourceManager &SM = Context.getSourceManager();
+ const LangOptions &LangOpts = Context.getLangOpts();
+
+ const CharSourceRange Range =
+ rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+ const std::vector<utils::lexer::CommentToken> Comments =
+ utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts);
+ ASSERT_EQ(1u, Comments.size());
+ EXPECT_EQ("/*second*/", Comments.front().Text);
+ const StringRef CodeText = Code.code();
+ const size_t SecondOffset = CodeText.find("/*second*/");
+ ASSERT_NE(StringRef::npos, SecondOffset);
+ EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments.front().Loc));
+}
+
+TEST(LexerUtilsTest, GetTrailingCommentsInRangeLineComments) {
+ llvm::Annotations Code(R"cpp(
+void f() {
+ $range[[// first
+ // second
+ ]]
+ int x = 0;
+}
+)cpp");
+ std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+ ASSERT_TRUE(AST);
+ const ASTContext &Context = AST->getASTContext();
+ const SourceManager &SM = Context.getSourceManager();
+ const LangOptions &LangOpts = Context.getLangOpts();
+
+ const CharSourceRange Range =
+ rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+ const std::vector<utils::lexer::CommentToken> Comments =
+ utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts);
+ ASSERT_EQ(2u, Comments.size());
+ EXPECT_EQ("// first", Comments[0].Text);
+ EXPECT_EQ("// second", Comments[1].Text);
+ const StringRef CodeText = Code.code();
+ const size_t FirstOffset = CodeText.find("// first");
+ ASSERT_NE(StringRef::npos, FirstOffset);
+ const size_t SecondOffset = CodeText.find("// second");
+ ASSERT_NE(StringRef::npos, SecondOffset);
+ EXPECT_EQ(FirstOffset, SM.getFileOffset(Comments[0].Loc));
+ EXPECT_EQ(SecondOffset, SM.getFileOffset(Comments[1].Loc));
+}
+
+TEST(LexerUtilsTest, GetTrailingCommentsInRangeInvalidRange) {
+ std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;");
+ ASSERT_TRUE(AST);
+ const ASTContext &Context = AST->getASTContext();
+ const SourceManager &SM = Context.getSourceManager();
+ const LangOptions &LangOpts = Context.getLangOpts();
+
+ const std::vector<utils::lexer::CommentToken> Comments =
+ utils::lexer::getTrailingCommentsInRange(CharSourceRange(), SM, LangOpts);
+ EXPECT_TRUE(Comments.empty());
+}
+
+} // namespace
+
+} // namespace clang::tidy::test
More information about the cfe-commits
mailing list