[clang] [clang-format] extend clang-format directive with options to prevent formatting for one line (PR #118566)
Oleksandr T. via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 3 16:27:27 PST 2024
https://github.com/a-tarasyuk created https://github.com/llvm/llvm-project/pull/118566
Fixes #54334
>From 8bae39299284dffd592bbf32374929e4a6f2d3ff Mon Sep 17 00:00:00 2001
From: Oleksandr T <oleksandr.tarasiuk at outlook.com>
Date: Wed, 4 Dec 2024 02:24:12 +0200
Subject: [PATCH] [clang-format] extend clang-format directive with options to
prevent formatting for one line
---
clang/include/clang/Format/Format.h | 11 +++-
clang/lib/Format/DefinitionBlockSeparator.cpp | 3 +-
clang/lib/Format/Format.cpp | 57 +++++++++++++------
clang/lib/Format/FormatTokenLexer.cpp | 39 ++++++++++---
clang/lib/Format/FormatTokenLexer.h | 8 ++-
.../Format/IntegerLiteralSeparatorFixer.cpp | 4 +-
clang/lib/Format/SortJavaScriptImports.cpp | 10 +++-
clang/lib/Format/TokenAnnotator.cpp | 4 +-
clang/unittests/Format/FormatTest.cpp | 53 +++++++++++++++++
9 files changed, 154 insertions(+), 35 deletions(-)
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 6383934afa2c40..b25d190178247d 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -5620,8 +5620,15 @@ inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
}
}
-bool isClangFormatOn(StringRef Comment);
-bool isClangFormatOff(StringRef Comment);
+enum class ClangFormatDirective {
+ None,
+ Off,
+ On,
+ OffLine,
+ OffNextLine,
+};
+
+ClangFormatDirective parseClangFormatDirective(StringRef Comment);
} // end namespace format
} // end namespace clang
diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index 319236d3bd618c..709ad5e776cc5a 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -144,7 +144,8 @@ void DefinitionBlockSeparator::separateBlocks(
return false;
if (const auto *Tok = OperateLine->First;
- Tok->is(tok::comment) && !isClangFormatOn(Tok->TokenText)) {
+ Tok->is(tok::comment) && parseClangFormatDirective(Tok->TokenText) ==
+ ClangFormatDirective::None) {
return true;
}
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index dcaac4b0d42cc5..e82ac41e652839 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3267,9 +3267,9 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
bool IsBlockComment = false;
- if (isClangFormatOff(Trimmed)) {
+ if (parseClangFormatDirective(Trimmed) == ClangFormatDirective::Off) {
FormattingOff = true;
- } else if (isClangFormatOn(Trimmed)) {
+ } else if (parseClangFormatDirective(Trimmed) == ClangFormatDirective::On) {
FormattingOff = false;
} else if (Trimmed.starts_with("/*")) {
IsBlockComment = true;
@@ -3452,9 +3452,9 @@ tooling::Replacements sortJavaImports(const FormatStyle &Style, StringRef Code,
Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
StringRef Trimmed = Line.trim();
- if (isClangFormatOff(Trimmed))
+ if (parseClangFormatDirective(Trimmed) == ClangFormatDirective::Off)
FormattingOff = true;
- else if (isClangFormatOn(Trimmed))
+ else if (parseClangFormatDirective(Trimmed) == ClangFormatDirective::On)
FormattingOff = false;
if (ImportRegex.match(Line, &Matches)) {
@@ -4190,24 +4190,45 @@ Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
return FallbackStyle;
}
-static bool isClangFormatOnOff(StringRef Comment, bool On) {
- if (Comment == (On ? "/* clang-format on */" : "/* clang-format off */"))
- return true;
+static unsigned skipWhitespace(unsigned Pos, StringRef Str, unsigned Length) {
+ while (Pos < Length && isspace(Str[Pos]))
+ ++Pos;
+ return Pos;
+}
- static const char ClangFormatOn[] = "// clang-format on";
- static const char ClangFormatOff[] = "// clang-format off";
- const unsigned Size = (On ? sizeof ClangFormatOn : sizeof ClangFormatOff) - 1;
+ClangFormatDirective parseClangFormatDirective(StringRef Comment) {
+ size_t Pos = std::min(Comment.find("/*"), Comment.find("//"));
+ unsigned Length = Comment.size();
+ if (Pos == StringRef::npos)
+ return ClangFormatDirective::None;
- return Comment.starts_with(On ? ClangFormatOn : ClangFormatOff) &&
- (Comment.size() == Size || Comment[Size] == ':');
-}
+ Pos = skipWhitespace(Pos + 2, Comment, Length);
+ StringRef ClangFormatDirectiveName = "clang-format";
-bool isClangFormatOn(StringRef Comment) {
- return isClangFormatOnOff(Comment, /*On=*/true);
-}
+ if (Comment.substr(Pos, ClangFormatDirectiveName.size()) ==
+ ClangFormatDirectiveName) {
+ Pos =
+ skipWhitespace(Pos + ClangFormatDirectiveName.size(), Comment, Length);
+
+ unsigned EndDirectiveValuePos = Pos;
+ while (EndDirectiveValuePos < Length) {
+ char Char = Comment[EndDirectiveValuePos];
+ if (isspace(Char) || Char == '*')
+ break;
+
+ ++EndDirectiveValuePos;
+ }
+
+ return llvm::StringSwitch<ClangFormatDirective>(
+ Comment.substr(Pos, EndDirectiveValuePos - Pos))
+ .Cases("off", "off:", ClangFormatDirective::Off)
+ .Cases("on", "on:", ClangFormatDirective::On)
+ .Case("off-line", ClangFormatDirective::OffLine)
+ .Case("off-next-line", ClangFormatDirective::OffNextLine)
+ .Default(ClangFormatDirective::None);
+ }
-bool isClangFormatOff(StringRef Comment) {
- return isClangFormatOnOff(Comment, /*On=*/false);
+ return ClangFormatDirective::None;
}
} // namespace format
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 7a264bddcdfe19..1bcb4acd7e8ad6 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -32,7 +32,8 @@ FormatTokenLexer::FormatTokenLexer(
LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
- FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
+ FDS(FormatDisableState::None),
+ MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
Lex->SetKeepWhitespaceMode(true);
@@ -82,7 +83,23 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
assert(Tokens.empty());
assert(FirstInLineIndex == 0);
do {
- Tokens.push_back(getNextToken());
+ FormatToken *NextToken = getNextToken();
+
+ if (FDS == FormatDisableState::None && NextToken->is(tok::comment) &&
+ parseClangFormatDirective(NextToken->TokenText) ==
+ ClangFormatDirective::OffLine) {
+ for (unsigned i = FirstInLineIndex; i < Tokens.size(); ++i)
+ Tokens[i]->Finalized = true;
+ }
+
+ if (Tokens.size() >= 1 && Tokens.back()->isNot(tok::comment) &&
+ FDS == FormatDisableState::SingleLine &&
+ (NextToken->NewlinesBefore > 0 || NextToken->IsMultiline)) {
+ FDS = FormatDisableState::None;
+ }
+
+ Tokens.push_back(NextToken);
+
if (Style.isJavaScript()) {
tryParseJSRegexLiteral();
handleTemplateStrings();
@@ -1450,13 +1467,21 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) {
if ((Style.isJavaScript() || Style.isProto()) && Tok.is(tok::char_constant))
Tok.Tok.setKind(tok::string_literal);
- if (Tok.is(tok::comment) && isClangFormatOn(Tok.TokenText))
- FormattingDisabled = false;
+ if (Tok.is(tok::comment) &&
+ parseClangFormatDirective(Tok.TokenText) == ClangFormatDirective::On) {
+ FDS = FormatDisableState::None;
+ }
+
+ Tok.Finalized = FDS != FormatDisableState::None;
- Tok.Finalized = FormattingDisabled;
+ if (Tok.is(tok::comment)) {
+ ClangFormatDirective FSD = parseClangFormatDirective(Tok.TokenText);
+ if (FSD == ClangFormatDirective::Off)
+ FDS = FormatDisableState::Range;
- if (Tok.is(tok::comment) && isClangFormatOff(Tok.TokenText))
- FormattingDisabled = true;
+ if (FSD == ClangFormatDirective::OffNextLine)
+ FDS = FormatDisableState::SingleLine;
+ }
}
void FormatTokenLexer::resetLexer(unsigned Offset) {
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 71389d2ade2b73..3d3338357bab05 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -32,6 +32,12 @@ enum LexerState {
TOKEN_STASHED,
};
+enum class FormatDisableState {
+ None,
+ SingleLine,
+ Range,
+};
+
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
@@ -131,7 +137,7 @@ class FormatTokenLexer {
llvm::SmallPtrSet<IdentifierInfo *, 8> TemplateNames, TypeNames;
- bool FormattingDisabled;
+ FormatDisableState FDS;
llvm::Regex MacroBlockBeginRegex;
llvm::Regex MacroBlockEndRegex;
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b1138..e93a8a84561002 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -93,9 +93,9 @@ IntegerLiteralSeparatorFixer::process(const Environment &Env,
auto Location = Tok.getLocation();
auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
if (Tok.is(tok::comment)) {
- if (isClangFormatOff(Text))
+ if (parseClangFormatDirective(Text) == ClangFormatDirective::Off)
Skip = true;
- else if (isClangFormatOn(Text))
+ else if (parseClangFormatDirective(Text) == ClangFormatDirective::On)
Skip = false;
continue;
}
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index 1acce26ff2795e..69ebe080f8f8a6 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -194,7 +194,9 @@ class JavaScriptImportSorter : public TokenAnalyzer {
// Separate references from the main code body of the file.
if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 &&
!(FirstNonImportLine->First->is(tok::comment) &&
- isClangFormatOn(FirstNonImportLine->First->TokenText.trim()))) {
+ parseClangFormatDirective(
+ FirstNonImportLine->First->TokenText.trim()) ==
+ ClangFormatDirective::On)) {
ReferencesText += "\n";
}
@@ -375,9 +377,11 @@ class JavaScriptImportSorter : public TokenAnalyzer {
// This is tracked in FormattingOff here and on JsModuleReference.
while (Current && Current->is(tok::comment)) {
StringRef CommentText = Current->TokenText.trim();
- if (isClangFormatOff(CommentText)) {
+ if (parseClangFormatDirective(CommentText) ==
+ ClangFormatDirective::Off) {
FormattingOff = true;
- } else if (isClangFormatOn(CommentText)) {
+ } else if (parseClangFormatDirective(CommentText) ==
+ ClangFormatDirective::On) {
FormattingOff = false;
// Special case: consider a trailing "clang-format on" line to be part
// of the module reference, so that it gets moved around together with
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index bc5239209f3aab..d73c7ed46df194 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3559,7 +3559,9 @@ void TokenAnnotator::setCommentLineLevels(
// If the comment is currently aligned with the line immediately following
// it, that's probably intentional and we should keep it.
if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
- Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
+ Line->isComment() &&
+ parseClangFormatDirective(Line->First->TokenText) ==
+ ClangFormatDirective::None &&
NextNonCommentLine->First->OriginalColumn ==
Line->First->OriginalColumn) {
const bool PPDirectiveOrImportStmt =
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 250e51b5421664..567c57c821dd69 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -28273,6 +28273,59 @@ TEST_F(FormatTest, KeepFormFeed) {
Style);
}
+TEST_F(FormatTest, DisableLine) {
+ verifyFormat("int a = 1; // clang-format off-line\n"
+ "int b = 1;",
+ "int a = 1; // clang-format off-line\n"
+ "int b = 1;");
+
+ verifyFormat("int a = 1;\n"
+ "int b = 1; // clang-format off-line\n"
+ "int c = 1;",
+ "int a = 1;\n"
+ "int b = 1; // clang-format off-line\n"
+ "int c = 1;");
+
+ verifyFormat("int a = 1; /* clang-format off-line */\n"
+ "int b = 1;",
+ "int a = 1; /* clang-format off-line */\n"
+ "int b = 1;");
+
+ verifyFormat("int a = 1;\n"
+ "int b = 1; /* clang-format off-line */\n"
+ "int c = 1;",
+ "int a = 1;\n"
+ "int b = 1; /* clang-format off-line */\n"
+ "int c = 1;");
+}
+
+TEST_F(FormatTest, DisableNextLine) {
+ verifyFormat("// clang-format off-next-line\n"
+ "int a = 1;\n"
+ "int b = 1;",
+ "// clang-format off-next-line\n"
+ "int a = 1;\n"
+ "int b = 1;");
+
+ verifyFormat("// clang-format off-next-line\n"
+ "\n"
+ "\n"
+ "int a = 1;\n"
+ "int b = 1;",
+ "// clang-format off-next-line\n"
+ "\n"
+ "\n"
+ "int a = 1;\n"
+ "int b = 1;");
+
+ verifyFormat("/* clang-format off-next-line */\n"
+ "int a = 1;\n"
+ "int b = 1;",
+ "/* clang-format off-next-line */\n"
+ "int a = 1;\n"
+ "int b = 1;");
+}
+
} // namespace
} // namespace test
} // namespace format
More information about the cfe-commits
mailing list