[clang] [clang-tools-extra] [clang-reorder-fields] Reorder leading comments (PR #123740)
Clement Courbet via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 21 04:12:36 PST 2025
https://github.com/legrosbuffle created https://github.com/llvm/llvm-project/pull/123740
Similarly to https://github.com/llvm/llvm-project/pull/122918, leading
comments are currently not being moved.
```
struct Foo {
// This one is the cool field.
int a;
int b;
};
```
becomes:
```
struct Foo {
// This one is the cool field.
int b;
int a;
};
```
but should be:
```
struct Foo {
int b;
// This one is the cool field.
int a;
};
```
>From 4146793e3a239663495f86ecf9df9245195c5cca Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet at google.com>
Date: Thu, 16 Jan 2025 07:34:13 +0000
Subject: [PATCH 1/2] [clang][refactor] Move utilities from
`clang_tidy::LexerUtils` to `clang::Lexer`
To make them more widely available and reusable.
Add unit tests.
---
.../clang-tidy/utils/LexerUtils.cpp | 25 +++++--------
clang/include/clang/Lex/Lexer.h | 6 ++++
clang/lib/Lex/Lexer.cpp | 21 +++++++++++
clang/unittests/Lex/LexerTest.cpp | 35 +++++++++++++++++++
4 files changed, 70 insertions(+), 17 deletions(-)
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index 50da196315d3b3..c14d341caf7794 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -17,25 +17,16 @@ namespace clang::tidy::utils::lexer {
std::pair<Token, SourceLocation>
getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
const LangOptions &LangOpts, bool SkipComments) {
- Token Token;
- Token.setKind(tok::unknown);
+ const std::optional<Token> Tok =
+ Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
- Location = Location.getLocWithOffset(-1);
- if (Location.isInvalid())
- return {Token, Location};
-
- const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
- while (Location != StartOfFile) {
- Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
- if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
- (!SkipComments || !Token.is(tok::comment))) {
- break;
- }
- if (Location == StartOfFile)
- return {Token, Location};
- Location = Location.getLocWithOffset(-1);
+ if (Tok.has_value()) {
+ return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
}
- return {Token, Location};
+
+ Token Token;
+ Token.setKind(tok::unknown);
+ return {Token, SourceLocation()};
}
Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 82a041ea3f848a..89c8ae354dafc1 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -557,6 +557,12 @@ class Lexer : public PreprocessorLexer {
const LangOptions &LangOpts,
bool IncludeComments = false);
+ /// Finds the token that comes before the given location.
+ static std::optional<Token> findPreviousToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool IncludeComments);
+
/// Checks that the given token is the first token that occurs after
/// the given location (this excludes comments and whitespace). Returns the
/// location immediately after the specified token. If the token is not found
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 115b6c1606a022..087c6f13aea66a 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1352,6 +1352,27 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
return Tok;
}
+std::optional<Token> Lexer::findPreviousToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool IncludeComments) {
+ const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Loc));
+ while (Loc != StartOfFile) {
+ Loc = Loc.getLocWithOffset(-1);
+ if (Loc.isInvalid())
+ return std::nullopt;
+
+ Loc = GetBeginningOfToken(Loc, SM, LangOpts);
+ Token Tok;
+ if (getRawToken(Loc, Tok, SM, LangOpts))
+ continue; // Not a token, go to prev location.
+ if (!Tok.is(tok::comment) || IncludeComments) {
+ return Tok;
+ }
+ }
+ return std::nullopt;
+}
+
/// Checks that the given token is the first token that occurs after the
/// given location (this excludes comments and whitespace). Returns the location
/// immediately after the specified token. If the token is not found or the
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index c897998cabe666..29c61fee6f5315 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -640,6 +640,41 @@ TEST_F(LexerTest, FindNextTokenIncludingComments) {
"=", "abcd", ";"));
}
+TEST_F(LexerTest, FindPreviousToken) {
+ Lex("int abcd = 0;\n"
+ "// A comment.\n"
+ "int xyz = abcd;\n");
+ std::vector<std::string> GeneratedByPrevToken;
+ SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+ while (true) {
+ auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
+ if (!T.has_value())
+ break;
+ GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+ Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+ }
+ EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
+ ";", "0", "=", "abcd", "int"));
+}
+
+TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
+ Lex("int abcd = 0;\n"
+ "// A comment.\n"
+ "int xyz = abcd;\n");
+ std::vector<std::string> GeneratedByPrevToken;
+ SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+ while (true) {
+ auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
+ if (!T.has_value())
+ break;
+ GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+ Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+ }
+ EXPECT_THAT(GeneratedByPrevToken,
+ ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
+ "0", "=", "abcd", "int"));
+}
+
TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
TrivialModuleLoader ModLoader;
auto PP = CreatePP("", ModLoader);
>From 2123fa2149216345580a146f5185e4b32019ddb6 Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet at google.com>
Date: Mon, 20 Jan 2025 16:12:01 +0000
Subject: [PATCH 2/2] [clang-reorder-fields] Reorder leading comments.
Similarly to https://github.com/llvm/llvm-project/pull/122918, leading
comments are currently not being moved.
```
struct Foo {
// This one is the cool field.
int a;
int b;
};
```
becomes:
```
struct Foo {
// This one is the cool field.
int b;
int a;
};
```
but should be:
```
struct Foo {
int b;
// This one is the cool field.
int a;
};
```
---
.../ReorderFieldsAction.cpp | 24 +++++++++++++++++++
.../test/clang-reorder-fields/Comments.cpp | 11 +++++----
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
index 30bc8be1719d5a..aeb7fe90f21752 100644
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@@ -118,6 +118,29 @@ findMembersUsedInInitExpr(const CXXCtorInitializer *Initializer,
return Results;
}
+/// Returns the start of the leading comments before `Loc`.
+static SourceLocation getStartOfLeadingComment(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ // We consider any leading comment token that is on the same line or
+ // indented similarly to the first comment to be part of the leading comment.
+ const unsigned Line = SM.getPresumedLineNumber(Loc);
+ const unsigned Column = SM.getPresumedColumnNumber(Loc);
+ std::optional<Token> Tok =
+ Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+ while (Tok && Tok->is(tok::comment)) {
+ const SourceLocation CommentLoc =
+ Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts);
+ if (SM.getPresumedLineNumber(CommentLoc) != Line &&
+ SM.getPresumedColumnNumber(CommentLoc) != Column) {
+ break;
+ }
+ Loc = CommentLoc;
+ Tok = Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+ }
+ return Loc;
+}
+
/// Returns the end of the trailing comments after `Loc`.
static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
const SourceManager &SM,
@@ -159,6 +182,7 @@ static SourceRange getFullFieldSourceRange(const FieldDecl &Field,
if (CurrentToken->is(tok::semi))
break;
}
+ Begin = getStartOfLeadingComment(Begin, SM, LangOpts);
End = getEndOfTrailingComment(End, SM, LangOpts);
return SourceRange(Begin, End);
}
diff --git a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
index a31b6692c9ac73..8a81fbfc093131 100644
--- a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
+++ b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-reorder-fields -record-name Foo -fields-order e1,e3,e2,a,c,b %s -- | FileCheck %s
+// RUN: clang-reorder-fields -record-name Foo -fields-order c,e1,e3,e2,a,b %s -- | FileCheck %s
class Foo {
int a; // Trailing comment for a.
@@ -12,12 +12,15 @@ class Foo {
int e3 /*c-like*/;
};
-// CHECK: /*c-like*/ int e1;
+// Note: the position of the empty line is somewhat arbitrary.
+
+// CHECK: // Prefix comments for c.
+// CHECK-NEXT: int c;
+// CHECK-NEXT: /*c-like*/ int e1;
// CHECK-NEXT: int e3 /*c-like*/;
+// CHECK-EMPTY:
// CHECK-NEXT: int /*c-like*/ e2;
// CHECK-NEXT: int a; // Trailing comment for a.
-// CHECK-NEXT: // Prefix comments for c.
-// CHECK-NEXT: int c;
// CHECK-NEXT: int b; // Multiline
// CHECK-NEXT: // trailing for b.
More information about the cfe-commits
mailing list