[clang] [clang-tools-extra] [clang-reorder-fields] Reorder leading comments (PR #123740)

Tue Jan 21 04:12:36 PST 2025

https://github.com/legrosbuffle created https://github.com/llvm/llvm-project/pull/123740

Similarly to https://github.com/llvm/llvm-project/pull/122918, leading
comments are currently not being moved.

```
struct Foo {
  // This one is the cool field.
  int a;
  int b;
};
```

becomes:

```
struct Foo {
  // This one is the cool field.
  int b;
  int a;
};
```

but should be:

```
struct Foo {
  int b;
  // This one is the cool field.
  int a;
};
```

>From 4146793e3a239663495f86ecf9df9245195c5cca Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet at google.com>
Date: Thu, 16 Jan 2025 07:34:13 +0000
Subject: [PATCH 1/2] [clang][refactor] Move utilities from
 `clang_tidy::LexerUtils` to `clang::Lexer`

To make them more widely available and reusable.

Add unit tests.
---
 .../clang-tidy/utils/LexerUtils.cpp           | 25 +++++--------
 clang/include/clang/Lex/Lexer.h               |  6 ++++
 clang/lib/Lex/Lexer.cpp                       | 21 +++++++++++
 clang/unittests/Lex/LexerTest.cpp             | 35 +++++++++++++++++++
 4 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index 50da196315d3b3..c14d341caf7794 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -17,25 +17,16 @@ namespace clang::tidy::utils::lexer {
 std::pair<Token, SourceLocation>
 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
                          const LangOptions &LangOpts, bool SkipComments) {
-  Token Token;
-  Token.setKind(tok::unknown);
+  const std::optional<Token> Tok =
+      Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
 
-  Location = Location.getLocWithOffset(-1);
-  if (Location.isInvalid())
-    return {Token, Location};
-
-  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
-  while (Location != StartOfFile) {
-    Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
-    if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
-        (!SkipComments || !Token.is(tok::comment))) {
-      break;
-    }
-    if (Location == StartOfFile)
-      return {Token, Location};
-    Location = Location.getLocWithOffset(-1);
+  if (Tok.has_value()) {
+    return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
   }
-  return {Token, Location};
+
+  Token Token;
+  Token.setKind(tok::unknown);
+  return {Token, SourceLocation()};
 }
 
 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 82a041ea3f848a..89c8ae354dafc1 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -557,6 +557,12 @@ class Lexer : public PreprocessorLexer {
                                             const LangOptions &LangOpts,
                                             bool IncludeComments = false);
 
+  /// Finds the token that comes before the given location.
+  static std::optional<Token> findPreviousToken(SourceLocation Loc,
+                                                const SourceManager &SM,
+                                                const LangOptions &LangOpts,
+                                                bool IncludeComments);
+
   /// Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the
   /// location immediately after the specified token. If the token is not found
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 115b6c1606a022..087c6f13aea66a 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1352,6 +1352,27 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
   return Tok;
 }
 
+std::optional<Token> Lexer::findPreviousToken(SourceLocation Loc,
+                                              const SourceManager &SM,
+                                              const LangOptions &LangOpts,
+                                              bool IncludeComments) {
+  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Loc));
+  while (Loc != StartOfFile) {
+    Loc = Loc.getLocWithOffset(-1);
+    if (Loc.isInvalid())
+      return std::nullopt;
+
+    Loc = GetBeginningOfToken(Loc, SM, LangOpts);
+    Token Tok;
+    if (getRawToken(Loc, Tok, SM, LangOpts))
+      continue; // Not a token, go to prev location.
+    if (!Tok.is(tok::comment) || IncludeComments) {
+      return Tok;
+    }
+  }
+  return std::nullopt;
+}
+
 /// Checks that the given token is the first token that occurs after the
 /// given location (this excludes comments and whitespace). Returns the location
 /// immediately after the specified token. If the token is not found or the
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index c897998cabe666..29c61fee6f5315 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -640,6 +640,41 @@ TEST_F(LexerTest, FindNextTokenIncludingComments) {
                           "=", "abcd", ";"));
 }
 
+TEST_F(LexerTest, FindPreviousToken) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
+                                                ";", "0", "=", "abcd", "int"));
+}
+
+TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken,
+              ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
+                          "0", "=", "abcd", "int"));
+}
+
 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
   TrivialModuleLoader ModLoader;
   auto PP = CreatePP("", ModLoader);

>From 2123fa2149216345580a146f5185e4b32019ddb6 Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet at google.com>
Date: Mon, 20 Jan 2025 16:12:01 +0000
Subject: [PATCH 2/2] [clang-reorder-fields] Reorder leading comments.

Similarly to https://github.com/llvm/llvm-project/pull/122918, leading
comments are currently not being moved.

```
struct Foo {
  // This one is the cool field.
  int a;
  int b;
};
```

becomes:

```
struct Foo {
  // This one is the cool field.
  int b;
  int a;
};
```

but should be:

```
struct Foo {
  int b;
  // This one is the cool field.
  int a;
};
```
---
 .../ReorderFieldsAction.cpp                   | 24 +++++++++++++++++++
 .../test/clang-reorder-fields/Comments.cpp    | 11 +++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
index 30bc8be1719d5a..aeb7fe90f21752 100644
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@@ -118,6 +118,29 @@ findMembersUsedInInitExpr(const CXXCtorInitializer *Initializer,
   return Results;
 }
 
+/// Returns the start of the leading comments before `Loc`.
+static SourceLocation getStartOfLeadingComment(SourceLocation Loc,
+                                               const SourceManager &SM,
+                                               const LangOptions &LangOpts) {
+  // We consider any leading comment token that is on the same line or
+  // indented similarly to the first comment to be part of the leading comment.
+  const unsigned Line = SM.getPresumedLineNumber(Loc);
+  const unsigned Column = SM.getPresumedColumnNumber(Loc);
+  std::optional<Token> Tok =
+      Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  while (Tok && Tok->is(tok::comment)) {
+    const SourceLocation CommentLoc =
+        Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts);
+    if (SM.getPresumedLineNumber(CommentLoc) != Line &&
+        SM.getPresumedColumnNumber(CommentLoc) != Column) {
+      break;
+    }
+    Loc = CommentLoc;
+    Tok = Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  }
+  return Loc;
+}
+
 /// Returns the end of the trailing comments after `Loc`.
 static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
                                               const SourceManager &SM,
@@ -159,6 +182,7 @@ static SourceRange getFullFieldSourceRange(const FieldDecl &Field,
     if (CurrentToken->is(tok::semi))
       break;
   }
+  Begin = getStartOfLeadingComment(Begin, SM, LangOpts);
   End = getEndOfTrailingComment(End, SM, LangOpts);
   return SourceRange(Begin, End);
 }
diff --git a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
index a31b6692c9ac73..8a81fbfc093131 100644
--- a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
+++ b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-reorder-fields -record-name Foo -fields-order e1,e3,e2,a,c,b %s -- | FileCheck %s
+// RUN: clang-reorder-fields -record-name Foo -fields-order c,e1,e3,e2,a,b %s -- | FileCheck %s
 
 class Foo {
   int a; // Trailing comment for a.
@@ -12,12 +12,15 @@ class Foo {
   int e3 /*c-like*/;
 };
 
-// CHECK:       /*c-like*/ int e1;
+// Note: the position of the empty line is somewhat arbitrary.
+
+// CHECK:       // Prefix comments for c.
+// CHECK-NEXT:  int c;
+// CHECK-NEXT:  /*c-like*/ int e1;
 // CHECK-NEXT:  int e3 /*c-like*/;
+// CHECK-EMPTY:
 // CHECK-NEXT:  int /*c-like*/ e2;
 // CHECK-NEXT:  int a; // Trailing comment for a.
-// CHECK-NEXT:  // Prefix comments for c.
-// CHECK-NEXT:  int c;
 // CHECK-NEXT:  int b; // Multiline
 // CHECK-NEXT:         // trailing for b.