[clang] [clang-format] extend clang-format directive with options to prevent formatting for one line (PR #118566)

Oleksandr T. via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 4 09:30:36 PST 2024


https://github.com/a-tarasyuk updated https://github.com/llvm/llvm-project/pull/118566

>From 75da343b0bd6e3b0f3219b349f6be4c882947820 Mon Sep 17 00:00:00 2001
From: Oleksandr T <oleksandr.tarasiuk at outlook.com>
Date: Wed, 4 Dec 2024 02:24:12 +0200
Subject: [PATCH 1/4] [clang-format] extend clang-format directive with options
 to prevent formatting for one line

---
 clang/include/clang/Format/Format.h           | 11 +++-
 clang/lib/Format/DefinitionBlockSeparator.cpp |  3 +-
 clang/lib/Format/Format.cpp                   | 59 +++++++++++++------
 clang/lib/Format/FormatTokenLexer.cpp         | 39 +++++++++---
 clang/lib/Format/FormatTokenLexer.h           |  8 ++-
 .../Format/IntegerLiteralSeparatorFixer.cpp   |  4 +-
 clang/lib/Format/SortJavaScriptImports.cpp    | 10 +++-
 clang/lib/Format/TokenAnnotator.cpp           |  4 +-
 clang/unittests/Format/FormatTest.cpp         | 53 +++++++++++++++++
 .../unittests/Format/SortImportsTestJava.cpp  |  9 +++
 10 files changed, 165 insertions(+), 35 deletions(-)

diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 6383934afa2c40..b25d190178247d 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -5620,8 +5620,15 @@ inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
   }
 }
 
-bool isClangFormatOn(StringRef Comment);
-bool isClangFormatOff(StringRef Comment);
+enum class ClangFormatDirective {
+  None,
+  Off,
+  On,
+  OffLine,
+  OffNextLine,
+};
+
+ClangFormatDirective parseClangFormatDirective(StringRef Comment);
 
 } // end namespace format
 } // end namespace clang
diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index 319236d3bd618c..709ad5e776cc5a 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -144,7 +144,8 @@ void DefinitionBlockSeparator::separateBlocks(
         return false;
 
       if (const auto *Tok = OperateLine->First;
-          Tok->is(tok::comment) && !isClangFormatOn(Tok->TokenText)) {
+          Tok->is(tok::comment) && parseClangFormatDirective(Tok->TokenText) ==
+                                       ClangFormatDirective::None) {
         return true;
       }
 
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index dcaac4b0d42cc5..11802e8f5b3738 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3266,10 +3266,11 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
       FormattingOff = false;
 
     bool IsBlockComment = false;
+    ClangFormatDirective CFD = parseClangFormatDirective(Trimmed);
 
-    if (isClangFormatOff(Trimmed)) {
+    if (CFD == ClangFormatDirective::Off) {
       FormattingOff = true;
-    } else if (isClangFormatOn(Trimmed)) {
+    } else if (CFD == ClangFormatDirective::On) {
       FormattingOff = false;
     } else if (Trimmed.starts_with("/*")) {
       IsBlockComment = true;
@@ -3452,9 +3453,10 @@ tooling::Replacements sortJavaImports(const FormatStyle &Style, StringRef Code,
         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
 
     StringRef Trimmed = Line.trim();
-    if (isClangFormatOff(Trimmed))
+    ClangFormatDirective CFD = parseClangFormatDirective(Trimmed);
+    if (CFD == ClangFormatDirective::Off)
       FormattingOff = true;
-    else if (isClangFormatOn(Trimmed))
+    else if (CFD == ClangFormatDirective::On)
       FormattingOff = false;
 
     if (ImportRegex.match(Line, &Matches)) {
@@ -4190,24 +4192,45 @@ Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
   return FallbackStyle;
 }
 
-static bool isClangFormatOnOff(StringRef Comment, bool On) {
-  if (Comment == (On ? "/* clang-format on */" : "/* clang-format off */"))
-    return true;
+static unsigned skipWhitespace(unsigned Pos, StringRef Str, unsigned Length) {
+  while (Pos < Length && isspace(Str[Pos]))
+    ++Pos;
+  return Pos;
+}
 
-  static const char ClangFormatOn[] = "// clang-format on";
-  static const char ClangFormatOff[] = "// clang-format off";
-  const unsigned Size = (On ? sizeof ClangFormatOn : sizeof ClangFormatOff) - 1;
+ClangFormatDirective parseClangFormatDirective(StringRef Comment) {
+  size_t Pos = std::min(Comment.find("/*"), Comment.find("//"));
+  unsigned Length = Comment.size();
+  if (Pos == StringRef::npos)
+    return ClangFormatDirective::None;
 
-  return Comment.starts_with(On ? ClangFormatOn : ClangFormatOff) &&
-         (Comment.size() == Size || Comment[Size] == ':');
-}
+  Pos = skipWhitespace(Pos + 2, Comment, Length);
+  StringRef ClangFormatDirectiveName = "clang-format";
 
-bool isClangFormatOn(StringRef Comment) {
-  return isClangFormatOnOff(Comment, /*On=*/true);
-}
+  if (Comment.substr(Pos, ClangFormatDirectiveName.size()) ==
+      ClangFormatDirectiveName) {
+    Pos =
+        skipWhitespace(Pos + ClangFormatDirectiveName.size(), Comment, Length);
+
+    unsigned EndDirectiveValuePos = Pos;
+    while (EndDirectiveValuePos < Length) {
+      char Char = Comment[EndDirectiveValuePos];
+      if (isspace(Char) || Char == '*' || Char == ':')
+        break;
+
+      ++EndDirectiveValuePos;
+    }
+
+    return llvm::StringSwitch<ClangFormatDirective>(
+               Comment.substr(Pos, EndDirectiveValuePos - Pos))
+        .Case("off", ClangFormatDirective::Off)
+        .Case("on", ClangFormatDirective::On)
+        .Case("off-line", ClangFormatDirective::OffLine)
+        .Case("off-next-line", ClangFormatDirective::OffNextLine)
+        .Default(ClangFormatDirective::None);
+  }
 
-bool isClangFormatOff(StringRef Comment) {
-  return isClangFormatOnOff(Comment, /*On=*/false);
+  return ClangFormatDirective::None;
 }
 
 } // namespace format
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 7a264bddcdfe19..1bcb4acd7e8ad6 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -32,7 +32,8 @@ FormatTokenLexer::FormatTokenLexer(
       LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
       Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
       Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
-      FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
+      FDS(FormatDisableState::None),
+      MacroBlockBeginRegex(Style.MacroBlockBegin),
       MacroBlockEndRegex(Style.MacroBlockEnd) {
   Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
   Lex->SetKeepWhitespaceMode(true);
@@ -82,7 +83,23 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
   assert(Tokens.empty());
   assert(FirstInLineIndex == 0);
   do {
-    Tokens.push_back(getNextToken());
+    FormatToken *NextToken = getNextToken();
+
+    if (FDS == FormatDisableState::None && NextToken->is(tok::comment) &&
+        parseClangFormatDirective(NextToken->TokenText) ==
+            ClangFormatDirective::OffLine) {
+      for (unsigned i = FirstInLineIndex; i < Tokens.size(); ++i)
+        Tokens[i]->Finalized = true;
+    }
+
+    if (Tokens.size() >= 1 && Tokens.back()->isNot(tok::comment) &&
+        FDS == FormatDisableState::SingleLine &&
+        (NextToken->NewlinesBefore > 0 || NextToken->IsMultiline)) {
+      FDS = FormatDisableState::None;
+    }
+
+    Tokens.push_back(NextToken);
+
     if (Style.isJavaScript()) {
       tryParseJSRegexLiteral();
       handleTemplateStrings();
@@ -1450,13 +1467,21 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) {
   if ((Style.isJavaScript() || Style.isProto()) && Tok.is(tok::char_constant))
     Tok.Tok.setKind(tok::string_literal);
 
-  if (Tok.is(tok::comment) && isClangFormatOn(Tok.TokenText))
-    FormattingDisabled = false;
+  if (Tok.is(tok::comment) &&
+      parseClangFormatDirective(Tok.TokenText) == ClangFormatDirective::On) {
+    FDS = FormatDisableState::None;
+  }
+
+  Tok.Finalized = FDS != FormatDisableState::None;
 
-  Tok.Finalized = FormattingDisabled;
+  if (Tok.is(tok::comment)) {
+    ClangFormatDirective FSD = parseClangFormatDirective(Tok.TokenText);
+    if (FSD == ClangFormatDirective::Off)
+      FDS = FormatDisableState::Range;
 
-  if (Tok.is(tok::comment) && isClangFormatOff(Tok.TokenText))
-    FormattingDisabled = true;
+    if (FSD == ClangFormatDirective::OffNextLine)
+      FDS = FormatDisableState::SingleLine;
+  }
 }
 
 void FormatTokenLexer::resetLexer(unsigned Offset) {
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 71389d2ade2b73..3d3338357bab05 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -32,6 +32,12 @@ enum LexerState {
   TOKEN_STASHED,
 };
 
+enum class FormatDisableState {
+  None,
+  SingleLine,
+  Range,
+};
+
 class FormatTokenLexer {
 public:
   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
@@ -131,7 +137,7 @@ class FormatTokenLexer {
 
   llvm::SmallPtrSet<IdentifierInfo *, 8> TemplateNames, TypeNames;
 
-  bool FormattingDisabled;
+  FormatDisableState FDS;
 
   llvm::Regex MacroBlockBeginRegex;
   llvm::Regex MacroBlockEndRegex;
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b1138..e93a8a84561002 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -93,9 +93,9 @@ IntegerLiteralSeparatorFixer::process(const Environment &Env,
     auto Location = Tok.getLocation();
     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
     if (Tok.is(tok::comment)) {
-      if (isClangFormatOff(Text))
+      if (parseClangFormatDirective(Text) == ClangFormatDirective::Off)
         Skip = true;
-      else if (isClangFormatOn(Text))
+      else if (parseClangFormatDirective(Text) == ClangFormatDirective::On)
         Skip = false;
       continue;
     }
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index 1acce26ff2795e..f5038dd2080094 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -194,7 +194,9 @@ class JavaScriptImportSorter : public TokenAnalyzer {
     // Separate references from the main code body of the file.
     if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 &&
         !(FirstNonImportLine->First->is(tok::comment) &&
-          isClangFormatOn(FirstNonImportLine->First->TokenText.trim()))) {
+          parseClangFormatDirective(
+              FirstNonImportLine->First->TokenText.trim()) ==
+              ClangFormatDirective::On)) {
       ReferencesText += "\n";
     }
 
@@ -375,9 +377,11 @@ class JavaScriptImportSorter : public TokenAnalyzer {
       // This is tracked in FormattingOff here and on JsModuleReference.
       while (Current && Current->is(tok::comment)) {
         StringRef CommentText = Current->TokenText.trim();
-        if (isClangFormatOff(CommentText)) {
+        ClangFormatDirective CFD = parseClangFormatDirective(CommentText);
+
+        if (CFD == ClangFormatDirective::Off) {
           FormattingOff = true;
-        } else if (isClangFormatOn(CommentText)) {
+        } else if (CFD == ClangFormatDirective::On) {
           FormattingOff = false;
           // Special case: consider a trailing "clang-format on" line to be part
           // of the module reference, so that it gets moved around together with
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index bc5239209f3aab..d73c7ed46df194 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3559,7 +3559,9 @@ void TokenAnnotator::setCommentLineLevels(
     // If the comment is currently aligned with the line immediately following
     // it, that's probably intentional and we should keep it.
     if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
-        Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
+        Line->isComment() &&
+        parseClangFormatDirective(Line->First->TokenText) ==
+            ClangFormatDirective::None &&
         NextNonCommentLine->First->OriginalColumn ==
             Line->First->OriginalColumn) {
       const bool PPDirectiveOrImportStmt =
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 250e51b5421664..567c57c821dd69 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -28273,6 +28273,59 @@ TEST_F(FormatTest, KeepFormFeed) {
                Style);
 }
 
+TEST_F(FormatTest, DisableLine) {
+  verifyFormat("int  a  =  1; // clang-format off-line\n"
+               "int b = 1;",
+               "int  a  =  1; // clang-format off-line\n"
+               "int  b  =  1;");
+
+  verifyFormat("int a = 1;\n"
+               "int  b  =  1; // clang-format off-line\n"
+               "int c = 1;",
+               "int  a  =  1;\n"
+               "int  b  =  1; // clang-format off-line\n"
+               "int  c  =  1;");
+
+  verifyFormat("int  a  =  1; /* clang-format off-line */\n"
+               "int b = 1;",
+               "int  a  =  1; /* clang-format off-line */\n"
+               "int  b  =  1;");
+
+  verifyFormat("int a = 1;\n"
+               "int  b  =  1; /* clang-format off-line */\n"
+               "int c = 1;",
+               "int  a  =  1;\n"
+               "int  b  =  1; /* clang-format off-line */\n"
+               "int  c  =  1;");
+}
+
+TEST_F(FormatTest, DisableNextLine) {
+  verifyFormat("// clang-format off-next-line\n"
+               "int  a  =  1;\n"
+               "int b = 1;",
+               "// clang-format off-next-line\n"
+               "int  a  =  1;\n"
+               "int  b  =  1;");
+
+  verifyFormat("// clang-format off-next-line\n"
+               "\n"
+               "\n"
+               "int  a  =  1;\n"
+               "int b = 1;",
+               "// clang-format off-next-line\n"
+               "\n"
+               "\n"
+               "int  a  =  1;\n"
+               "int  b  =  1;");
+
+  verifyFormat("/* clang-format off-next-line */\n"
+               "int  a  =  1;\n"
+               "int b = 1;",
+               "/* clang-format off-next-line */\n"
+               "int  a  =  1;\n"
+               "int  b  =  1;");
+}
+
 } // namespace
 } // namespace test
 } // namespace format
diff --git a/clang/unittests/Format/SortImportsTestJava.cpp b/clang/unittests/Format/SortImportsTestJava.cpp
index d577efa34f86e8..c77e1661300fb5 100644
--- a/clang/unittests/Format/SortImportsTestJava.cpp
+++ b/clang/unittests/Format/SortImportsTestJava.cpp
@@ -235,6 +235,15 @@ TEST_F(SortImportsTestJava, FormatTotallyOn) {
                  "import org.a;"));
 }
 
+TEST_F(SortImportsTestJava, DisableLine) {
+  EXPECT_EQ("// clang-format on\n"
+            "import org.a;\n"
+            "import org.b;",
+            sort("// clang-format on\n"
+                 "import org.b;\n"
+                 "import org.a;"));
+}
+
 TEST_F(SortImportsTestJava, FormatPariallyOnShouldNotReorder) {
   EXPECT_EQ("// clang-format off\n"
             "import org.b;\n"

>From f3b7d9c92cdb42fe0fb80ad7f51ab2a5985ba89e Mon Sep 17 00:00:00 2001
From: Oleksandr T <oleksandr.tarasiuk at outlook.com>
Date: Wed, 4 Dec 2024 15:51:18 +0200
Subject: [PATCH 2/4] update release notes

---
 clang/docs/ReleaseNotes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 755418e9550cf4..1d7fddb2236740 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -997,6 +997,7 @@ clang-format
   ``Never``, and ``true`` to ``Always``.
 - Adds ``RemoveEmptyLinesInUnwrappedLines`` option.
 - Adds ``KeepFormFeed`` option and set it to ``true`` for ``GNU`` style.
+- Adds ``off-line`` and ``off-next-line`` options to the ``clang-format`` directive
 
 libclang
 --------

>From 96c7dbefb893216d2c672871b88fae0080c01d9a Mon Sep 17 00:00:00 2001
From: Oleksandr T <oleksandr.tarasiuk at outlook.com>
Date: Wed, 4 Dec 2024 15:57:27 +0200
Subject: [PATCH 3/4] fix typo

---
 clang/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1d7fddb2236740..a621f63c2be470 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -997,7 +997,7 @@ clang-format
   ``Never``, and ``true`` to ``Always``.
 - Adds ``RemoveEmptyLinesInUnwrappedLines`` option.
 - Adds ``KeepFormFeed`` option and set it to ``true`` for ``GNU`` style.
-- Adds ``off-line`` and ``off-next-line`` options to the ``clang-format`` directive
+- Adds ``off-line`` and ``off-next-line`` options to the ``clang-format`` directive.
 
 libclang
 --------

>From 511356a9ba721b25c994e5dac0d83a6b71c078f9 Mon Sep 17 00:00:00 2001
From: Oleksandr T <oleksandr.tarasiuk at outlook.com>
Date: Wed, 4 Dec 2024 19:30:21 +0200
Subject: [PATCH 4/4] cleanup

---
 clang/unittests/Format/SortImportsTestJava.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/clang/unittests/Format/SortImportsTestJava.cpp b/clang/unittests/Format/SortImportsTestJava.cpp
index c77e1661300fb5..d577efa34f86e8 100644
--- a/clang/unittests/Format/SortImportsTestJava.cpp
+++ b/clang/unittests/Format/SortImportsTestJava.cpp
@@ -235,15 +235,6 @@ TEST_F(SortImportsTestJava, FormatTotallyOn) {
                  "import org.a;"));
 }
 
-TEST_F(SortImportsTestJava, DisableLine) {
-  EXPECT_EQ("// clang-format on\n"
-            "import org.a;\n"
-            "import org.b;",
-            sort("// clang-format on\n"
-                 "import org.b;\n"
-                 "import org.a;"));
-}
-
 TEST_F(SortImportsTestJava, FormatPariallyOnShouldNotReorder) {
   EXPECT_EQ("// clang-format off\n"
             "import org.b;\n"



More information about the cfe-commits mailing list