[clang-tools-extra] 1819646 - [clang][refactor] Refactor `findNextTokenIncludingComments` (#123060)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 16 08:06:08 PST 2025


Author: Clement Courbet
Date: 2025-01-16T17:06:05+01:00
New Revision: 18196466238ff25d5c76906645ba1d92f08bd0f7

URL: https://github.com/llvm/llvm-project/commit/18196466238ff25d5c76906645ba1d92f08bd0f7
DIFF: https://github.com/llvm/llvm-project/commit/18196466238ff25d5c76906645ba1d92f08bd0f7.diff

LOG: [clang][refactor] Refactor `findNextTokenIncludingComments` (#123060)

We have two copies of the same code in clang-tidy and
clang-reorder-fields, and those are extremenly similar to
`Lexer::findNextToken`, so just add an extra agument to the latter.

---------

Co-authored-by: cor3ntin <corentinjabot at gmail.com>

Added: 
    

Modified: 
    clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
    clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
    clang-tools-extra/clang-tidy/utils/LexerUtils.h
    clang/include/clang/Lex/Lexer.h
    clang/lib/Lex/Lexer.cpp
    clang/unittests/Lex/LexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
index 80ee31368fe9a5..30bc8be1719d5a 100644
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@@ -118,35 +118,6 @@ findMembersUsedInInitExpr(const CXXCtorInitializer *Initializer,
   return Results;
 }
 
-/// Returns the next token after `Loc` (including comment tokens).
-static std::optional<Token> getTokenAfter(SourceLocation Loc,
-                                          const SourceManager &SM,
-                                          const LangOptions &LangOpts) {
-  if (Loc.isMacroID()) {
-    return std::nullopt;
-  }
-  Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
-
-  // Break down the source location.
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
-
-  // Try to load the file buffer.
-  bool InvalidTemp = false;
-  StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
-  if (InvalidTemp)
-    return std::nullopt;
-
-  const char *TokenBegin = File.data() + LocInfo.second;
-
-  Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
-              TokenBegin, File.end());
-  lexer.SetCommentRetentionState(true);
-  // Find the token.
-  Token Tok;
-  lexer.LexFromRawLexer(Tok);
-  return Tok;
-}
-
 /// Returns the end of the trailing comments after `Loc`.
 static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
                                               const SourceManager &SM,
@@ -154,11 +125,12 @@ static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
   // We consider any following comment token that is indented more than the
   // first comment to be part of the trailing comment.
   const unsigned Column = SM.getPresumedColumnNumber(Loc);
-  std::optional<Token> Tok = getTokenAfter(Loc, SM, LangOpts);
+  std::optional<Token> Tok =
+      Lexer::findNextToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
   while (Tok && Tok->is(tok::comment) &&
          SM.getPresumedColumnNumber(Tok->getLocation()) > Column) {
     Loc = Tok->getEndLoc();
-    Tok = getTokenAfter(Loc, SM, LangOpts);
+    Tok = Lexer::findNextToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
   }
   return Loc;
 }

diff  --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index 92c3e0ed7894e1..50da196315d3b3 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -86,29 +86,6 @@ SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
   return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
 }
 
-std::optional<Token>
-findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
-                               const LangOptions &LangOpts) {
-  // `Lexer::findNextToken` will ignore comment
-  if (Start.isMacroID())
-    return std::nullopt;
-  Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
-  // Break down the source location.
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
-  bool InvalidTemp = false;
-  StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
-  if (InvalidTemp)
-    return std::nullopt;
-  // Lex from the start of the given location.
-  Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
-          File.data() + LocInfo.second, File.end());
-  L.SetCommentRetentionState(true);
-  // Find the token.
-  Token Tok;
-  L.LexFromRawLexer(Tok);
-  return Tok;
-}
-
 std::optional<Token>
 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
                               const LangOptions &LangOpts) {

diff  --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index ea9bd512b68b8f..afd63885e388ce 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -89,9 +89,11 @@ SourceLocation findNextAnyTokenKind(SourceLocation Start,
   }
 }
 
-std::optional<Token>
+inline std::optional<Token>
 findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
-                               const LangOptions &LangOpts);
+                               const LangOptions &LangOpts) {
+  return Lexer::findNextToken(Start, SM, LangOpts, true);
+}
 
 // Finds next token that's not a comment.
 std::optional<Token> findNextTokenSkippingComments(SourceLocation Start,

diff  --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index b6ecc7e5ded9e2..82a041ea3f848a 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -554,7 +554,8 @@ class Lexer : public PreprocessorLexer {
   /// Returns the next token, or std::nullopt if the location is inside a macro.
   static std::optional<Token> findNextToken(SourceLocation Loc,
                                             const SourceManager &SM,
-                                            const LangOptions &LangOpts);
+                                            const LangOptions &LangOpts,
+                                            bool IncludeComments = false);
 
   /// Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the

diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 72364500a48f9f..115b6c1606a022 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1323,7 +1323,8 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
 
 std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
                                           const SourceManager &SM,
-                                          const LangOptions &LangOpts) {
+                                          const LangOptions &LangOpts,
+                                          bool IncludeComments) {
   if (Loc.isMacroID()) {
     if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
       return std::nullopt;
@@ -1344,6 +1345,7 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
   // Lex from the start of the given location.
   Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
                                       TokenBegin, File.end());
+  lexer.SetCommentRetentionState(IncludeComments);
   // Find the token.
   Token Tok;
   lexer.LexFromRawLexer(Tok);

diff  --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index aead7fb899d0a8..c897998cabe666 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -603,6 +603,7 @@ TEST_F(LexerTest, CharRangeOffByOne) {
 
 TEST_F(LexerTest, FindNextToken) {
   Lex("int abcd = 0;\n"
+      "// A comment.\n"
       "int xyz = abcd;\n");
   std::vector<std::string> GeneratedByNextToken;
   SourceLocation Loc =
@@ -619,6 +620,26 @@ TEST_F(LexerTest, FindNextToken) {
                                                 "xyz", "=", "abcd", ";"));
 }
 
+TEST_F(LexerTest, FindNextTokenIncludingComments) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByNextToken;
+  SourceLocation Loc =
+      SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts, true);
+    ASSERT_TRUE(T);
+    if (T->is(tok::eof))
+      break;
+    GeneratedByNextToken.push_back(getSourceText(*T, *T));
+    Loc = T->getLocation();
+  }
+  EXPECT_THAT(GeneratedByNextToken,
+              ElementsAre("abcd", "=", "0", ";", "// A comment.", "int", "xyz",
+                          "=", "abcd", ";"));
+}
+
 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
   TrivialModuleLoader ModLoader;
   auto PP = CreatePP("", ModLoader);


        


More information about the cfe-commits mailing list