[clang] [clang-format] Stop crashing on slightly off Verilog module headers (PR #116000)

via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 12 22:26:04 PST 2024


https://github.com/sstwcw updated https://github.com/llvm/llvm-project/pull/116000

>From 1e2283c1fc32a0590e98cec061edaec5e8ee67b1 Mon Sep 17 00:00:00 2001
From: sstwcw <su3e8a96kzlver at posteo.net>
Date: Wed, 13 Nov 2024 05:37:04 +0000
Subject: [PATCH 1/2] [clang-format] Stop crashing on slightly off Verilog
 module headers

This piece of code made the program crash.

```Verilog
function pkg::t get
    (int t = 2,
     int f = 2);
```

The way the code is supposed to be parsed is that UnwrappedLineParser
should identify the function header, and then TokenAnnotator should
recognize the result.  But the code in UnwrappedLineParser would
mistakenly not recognize it due to the `::`.  Then TokenAnnotator would
recognize the comma both as TT_VerilogInstancePortComma and
TT_VerilogTypeComma.  The code for annotating the instance port comma
used `setFinalizedType`.  The program would crash when it tried to set
it to another type.

The code in UnwrappedLineParser now recognizes the `::` token.

The are other cases in which TokenAnnotator would recognize the comma as
both of those types, for example if the `function` keyword is removed.
The type is now set using `setType` instead so that the program does not
crash.  The developer no longer knows why he used `setFinalizedType`
back then.
---
 clang/lib/Format/TokenAnnotator.cpp           | 4 ++--
 clang/lib/Format/UnwrappedLineParser.cpp      | 3 ++-
 clang/unittests/Format/FormatTestVerilog.cpp  | 5 +++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 8 ++++++++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 269cbef2720792..bc5239209f3aab 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1554,7 +1554,7 @@ class AnnotatingParser {
         };
 
         if (IsInstancePort())
-          Tok->setFinalizedType(TT_VerilogInstancePortLParen);
+          Tok->setType(TT_VerilogInstancePortLParen);
       }
 
       if (!parseParens())
@@ -1730,7 +1730,7 @@ class AnnotatingParser {
         Tok->setType(TT_InheritanceComma);
         break;
       case Context::VerilogInstancePortList:
-        Tok->setFinalizedType(TT_VerilogInstancePortComma);
+        Tok->setType(TT_VerilogInstancePortComma);
         break;
       default:
         if (Style.isVerilog() && Contexts.size() == 1 &&
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 5f1dd38ef1eb3b..e3b96b19ba0318 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4441,7 +4441,8 @@ unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
         parseSquare();
       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
-                 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
+                 FormatTok->isOneOf(tok::coloncolon, Keywords.kw_automatic,
+                                    tok::kw_static)) {
         nextToken();
       } else {
         break;
diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp
index 49d276fc78d81b..2bc3887af5c1db 100644
--- a/clang/unittests/Format/FormatTestVerilog.cpp
+++ b/clang/unittests/Format/FormatTestVerilog.cpp
@@ -702,6 +702,11 @@ TEST_F(FormatTestVerilog, Hierarchy) {
                "  generate\n"
                "  endgenerate\n"
                "endfunction : x");
+  // Type names with '::' should be recognized.
+  verifyFormat("function automatic x::x x\n"
+               "    (input x);\n"
+               "endfunction : x");
+  verifyNoCrash("x x(x x, x x);");
 }
 
 TEST_F(FormatTestVerilog, Identifiers) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index bb8ee416ea2db5..e88b8ea4894657 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2598,6 +2598,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) {
   Tokens = Annotate("x = '{\"\"};");
   ASSERT_EQ(Tokens.size(), 8u) << Tokens;
   EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+
+  // Module headers.
+  Tokens = Annotate("module x();\nendmodule");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_VerilogMultiLineListLParen);
+  Tokens = Annotate("function automatic x::x x();\nendmodule");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {

>From 88547bced7572ae98092e4c0e41dcfe4faf2eb54 Mon Sep 17 00:00:00 2001
From: sstwcw <su3e8a96kzlver at posteo.net>
Date: Wed, 13 Nov 2024 06:25:27 +0000
Subject: [PATCH 2/2] Recognize backticks

---
 clang/lib/Format/UnwrappedLineParser.cpp      | 4 ++--
 clang/unittests/Format/FormatTestVerilog.cpp  | 7 +++++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index e3b96b19ba0318..c182aaf0876d1b 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4441,8 +4441,8 @@ unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
         parseSquare();
       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
-                 FormatTok->isOneOf(tok::coloncolon, Keywords.kw_automatic,
-                                    tok::kw_static)) {
+                 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
+                                    Keywords.kw_automatic, tok::kw_static)) {
         nextToken();
       } else {
         break;
diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp
index 2bc3887af5c1db..e4a14ff754d1a2 100644
--- a/clang/unittests/Format/FormatTestVerilog.cpp
+++ b/clang/unittests/Format/FormatTestVerilog.cpp
@@ -706,6 +706,13 @@ TEST_F(FormatTestVerilog, Hierarchy) {
   verifyFormat("function automatic x::x x\n"
                "    (input x);\n"
                "endfunction : x");
+  // Names having to do macros should be recognized.
+  verifyFormat("function automatic x::x x``x\n"
+               "    (input x);\n"
+               "endfunction : x");
+  verifyFormat("function automatic x::x `x\n"
+               "    (input x);\n"
+               "endfunction : x");
   verifyNoCrash("x x(x x, x x);");
 }
 
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index e88b8ea4894657..e1ae1770e8ebe8 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2603,6 +2603,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) {
   Tokens = Annotate("module x();\nendmodule");
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_VerilogMultiLineListLParen);
+  Tokens = Annotate("function automatic `x x();\nendmodule");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_VerilogMultiLineListLParen);
+  Tokens = Annotate("function automatic x``x x();\nendmodule");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen);
   Tokens = Annotate("function automatic x::x x();\nendmodule");
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen);



More information about the cfe-commits mailing list