[clang] [clang-format] Recognize TableGen paste operator on separate line (PR #133722)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 4 07:27:46 PDT 2025
https://github.com/sstwcw updated https://github.com/llvm/llvm-project/pull/133722
>From b018450ed1a2901dd5b8cd5e146c1ce5d8613c86 Mon Sep 17 00:00:00 2001
From: sstwcw <su3e8a96kzlver at posteo.net>
Date: Mon, 31 Mar 2025 13:50:02 +0000
Subject: [PATCH 1/3] [clang-format] Recognize TableGen paste operator on
separate line
Formatting this piece of code made the program crash.
```
class TypedVecListRegOperand<RegisterClass Reg, int lanes, string eltsize>
: RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
# eltsize # "'>">;
```
The line starting with the `#` was treated as a separate preprocessor
directive line. Then the code dereferenced a null pointer when it tried
to continue parsing the first line that did not end in a semicolon.
Now the 2 problems are fixed.
---
clang/lib/Format/TokenAnnotator.cpp | 2 ++
clang/lib/Format/UnwrappedLineParser.cpp | 11 +++++++++--
clang/unittests/Format/FormatTestTableGen.cpp | 6 ++++++
clang/unittests/Format/TokenAnnotatorTest.cpp | 17 +++++++++++++++++
4 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index d87b3a6088bd8..278355aa58586 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -949,6 +949,8 @@ class AnnotatingParser {
HashTok->setType(TT_Unknown);
if (!parseTableGenValue(ParseNameMode))
return false;
+ if (!CurrentToken)
+ return true;
}
// In name mode, '{' is regarded as the end of the value.
// See TGParser::ParseValue in TGParser.cpp
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index f7712bea01c2c..aa0c372d5e15f 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4853,9 +4853,16 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
PreviousWasComment = FormatTok->is(tok::comment);
while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
- (!Style.isVerilog() ||
- Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
FirstNonCommentOnLine) {
+ // In Verilog, the backtick is used for macro invocations. In TableGen,
+ // the single hash is used for the paste operator.
+ const FormatToken *Next = Tokens->peekNextToken();
+ assert(Next); // There is an EOF token at the end.
+ if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
+ (Style.isTableGen() &&
+ !Next->isOneOf(tok::pp_define, tok::pp_ifdef, tok::pp_ifndef))) {
+ break;
+ }
distributeComments(Comments, FormatTok);
Comments.clear();
// If there is an unfinished unwrapped line, we flush the preprocessor
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp
index 92377c31f2e91..b78f79f20704f 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -218,6 +218,12 @@ TEST_F(FormatTestTableGen, PasteOperator) {
" string Z = [\"Traring\", \"Paste\", \"Traring\", \"Paste\",\n"
" \"Traring\", \"Paste\"]#;\n"
"}");
+ verifyFormat("def x#x {}", "def x\n"
+ "#x {}");
+ verifyFormat("def x#x {}", "def x\n"
+ "#\n"
+ "x {}");
+ verifyFormat("def x#x");
}
TEST_F(FormatTestTableGen, ClassDefinition) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index ac5e979aea071..fb8f5d30a669f 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2832,6 +2832,23 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
Tokens = Annotate("!cond");
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
+ // The paste operator should not be treated as a preprocessor directive even
+ // if it is on a separate line.
+ Tokens = Annotate("def x\n"
+ "#embed {}");
+ ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+ EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+ EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
+ EXPECT_EQ(Tokens[1]->Next, Tokens[2]);
+ Tokens = Annotate("def x\n"
+ "#define x\n"
+ "#embed {}");
+ ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+ EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+ EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
+ EXPECT_TOKEN(Tokens[5], tok::hash, TT_Unknown);
+ EXPECT_EQ(Tokens[1]->Next, Tokens[5]);
+
auto AnnotateValue = [this, &Style](StringRef Code) {
// Values are annotated only in specific context.
auto Result = annotate(("def X { let V = " + Code + "; }").str(), Style);
>From d3671a56acf741443dd2ba16fc46f488103abd36 Mon Sep 17 00:00:00 2001
From: sstwcw <su3e8a96kzlver at posteo.net>
Date: Fri, 4 Apr 2025 14:24:58 +0000
Subject: [PATCH 2/3] Add other directives
---
clang/lib/Format/UnwrappedLineParser.cpp | 3 ++-
clang/unittests/Format/TokenAnnotatorTest.cpp | 11 ++++++++---
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index aa0c372d5e15f..d3cedd249214a 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4860,7 +4860,8 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
assert(Next); // There is an EOF token at the end.
if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
(Style.isTableGen() &&
- !Next->isOneOf(tok::pp_define, tok::pp_ifdef, tok::pp_ifndef))) {
+ !Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
+ tok::pp_ifndef, tok::pp_endif))) {
break;
}
distributeComments(Comments, FormatTok);
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index fb8f5d30a669f..dbe3e05e6fb72 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2838,16 +2838,21 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
"#embed {}");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
- EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
EXPECT_EQ(Tokens[1]->Next, Tokens[2]);
Tokens = Annotate("def x\n"
"#define x\n"
"#embed {}");
ASSERT_EQ(Tokens.size(), 10u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
- EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
- EXPECT_TOKEN(Tokens[5], tok::hash, TT_Unknown);
EXPECT_EQ(Tokens[1]->Next, Tokens[5]);
+ Tokens = Annotate("def x\n"
+ "#ifdef x\n"
+ "#else\n"
+ "#endif\n"
+ "#embed {}");
+ ASSERT_EQ(Tokens.size(), 14u) << Tokens;
+ EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+ EXPECT_EQ(Tokens[1]->Next, Tokens[9]);
auto AnnotateValue = [this, &Style](StringRef Code) {
// Values are annotated only in specific context.
>From 45c25b7c9bf8c95c9447dbae08562e99bd5642ee Mon Sep 17 00:00:00 2001
From: sstwcw <su3e8a96kzlver at posteo.net>
Date: Fri, 4 Apr 2025 14:27:29 +0000
Subject: [PATCH 3/3] Comments
---
clang/lib/Format/UnwrappedLineParser.cpp | 3 +--
clang/unittests/Format/FormatTestTableGen.cpp | 1 +
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index d3cedd249214a..cab21fc4970fe 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4856,8 +4856,7 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
FirstNonCommentOnLine) {
// In Verilog, the backtick is used for macro invocations. In TableGen,
// the single hash is used for the paste operator.
- const FormatToken *Next = Tokens->peekNextToken();
- assert(Next); // There is an EOF token at the end.
+ const auto *Next = Tokens->peekNextToken();
if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
(Style.isTableGen() &&
!Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp
index b78f79f20704f..1c3d187de393c 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -218,6 +218,7 @@ TEST_F(FormatTestTableGen, PasteOperator) {
" string Z = [\"Traring\", \"Paste\", \"Traring\", \"Paste\",\n"
" \"Traring\", \"Paste\"]#;\n"
"}");
+
verifyFormat("def x#x {}", "def x\n"
"#x {}");
verifyFormat("def x#x {}", "def x\n"
More information about the cfe-commits
mailing list