[clang] 9ed2e68 - [clang-format] Parse Verilog if statements

via cfe-commits cfe-commits at lists.llvm.org
Sat Jun 25 19:21:43 PDT 2022


Author: sstwcw
Date: 2022-06-26T01:52:15Z
New Revision: 9ed2e68c9ae5cf346f938cc095e5448c1ff60f51

URL: https://github.com/llvm/llvm-project/commit/9ed2e68c9ae5cf346f938cc095e5448c1ff60f51
DIFF: https://github.com/llvm/llvm-project/commit/9ed2e68c9ae5cf346f938cc095e5448c1ff60f51.diff

LOG: [clang-format] Parse Verilog if statements

This patch mainly handles treating `begin` as block openers.

While and for statements will be handled in another patch.

Reviewed By: HazardyKnusperkeks

Differential Revision: https://reviews.llvm.org/D123450

Added: 
    clang/unittests/Format/FormatTestVerilog.cpp

Modified: 
    clang/docs/ClangFormat.rst
    clang/include/clang/Format/Format.h
    clang/lib/Format/Format.cpp
    clang/lib/Format/FormatToken.h
    clang/lib/Format/UnwrappedLineParser.cpp
    clang/tools/clang-format/ClangFormat.cpp
    clang/unittests/Format/CMakeLists.txt
    clang/unittests/Format/FormatTestUtils.h

Removed: 
    


################################################################################
diff  --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst
index 745c66efa9e0e..16b316cdf0667 100644
--- a/clang/docs/ClangFormat.rst
+++ b/clang/docs/ClangFormat.rst
@@ -43,6 +43,17 @@ to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code.
     --assume-filename=<string>     - Override filename used to determine the language.
                                      When reading from stdin, clang-format assumes this
                                      filename to determine the language.
+                                     Unrecognized filenames are treated as C++.
+                                     supported:
+                                       CSharp: .cs
+                                       Java: .java
+                                       JavaScript: .mjs .js .ts
+                                       Json: .json
+                                       Objective-C: .m .mm
+                                       Proto: .proto .protodevel
+                                       TableGen: .td
+                                       TextProto: .textpb .pb.txt .textproto .asciipb
+                                       Verilog: .sv .svh .v .vh
     --cursor=<uint>                - The position of the cursor when invoking
                                      clang-format from an editor integration
     --dry-run                      - If set, do not actually make the formatting changes

diff  --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 8249060dc7c04..f8a4b069b2e75 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2589,12 +2589,17 @@ struct FormatStyle {
     LK_TableGen,
     /// Should be used for Protocol Buffer messages in text format
     /// (https://developers.google.com/protocol-buffers/).
-    LK_TextProto
+    LK_TextProto,
+    /// Should be used for Verilog and SystemVerilog.
+    /// https://standards.ieee.org/ieee/1800/6700/
+    /// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
+    LK_Verilog
   };
   bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
   bool isCSharp() const { return Language == LK_CSharp; }
   bool isJson() const { return Language == LK_Json; }
   bool isJavaScript() const { return Language == LK_JavaScript; }
+  bool isVerilog() const { return Language == LK_Verilog; }
 
   /// Language, this format style is targeted at.
   /// \version 3.5
@@ -4285,6 +4290,8 @@ inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
     return "TableGen";
   case FormatStyle::LK_TextProto:
     return "TextProto";
+  case FormatStyle::LK_Verilog:
+    return "Verilog";
   default:
     return "Unknown";
   }

diff  --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index df1aa9da16e39..51526dc2a6817 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3471,6 +3471,12 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
     return FormatStyle::LK_CSharp;
   if (FileName.endswith_insensitive(".json"))
     return FormatStyle::LK_Json;
+  if (FileName.endswith_insensitive(".sv") ||
+      FileName.endswith_insensitive(".svh") ||
+      FileName.endswith_insensitive(".v") ||
+      FileName.endswith_insensitive(".vh")) {
+    return FormatStyle::LK_Verilog;
+  }
   return FormatStyle::LK_Cpp;
 }
 

diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 145ae39ddb62c..92fa0798f699e 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -979,6 +979,118 @@ struct AdditionalKeywords {
     kw_when = &IdentTable.get("when");
     kw_where = &IdentTable.get("where");
 
+    kw_always = &IdentTable.get("always");
+    kw_always_comb = &IdentTable.get("always_comb");
+    kw_always_ff = &IdentTable.get("always_ff");
+    kw_always_latch = &IdentTable.get("always_latch");
+    kw_assign = &IdentTable.get("assign");
+    kw_assume = &IdentTable.get("assume");
+    kw_automatic = &IdentTable.get("automatic");
+    kw_before = &IdentTable.get("before");
+    kw_begin = &IdentTable.get("begin");
+    kw_bins = &IdentTable.get("bins");
+    kw_binsof = &IdentTable.get("binsof");
+    kw_casex = &IdentTable.get("casex");
+    kw_casez = &IdentTable.get("casez");
+    kw_celldefine = &IdentTable.get("celldefine");
+    kw_checker = &IdentTable.get("checker");
+    kw_clocking = &IdentTable.get("clocking");
+    kw_constraint = &IdentTable.get("constraint");
+    kw_cover = &IdentTable.get("cover");
+    kw_covergroup = &IdentTable.get("covergroup");
+    kw_coverpoint = &IdentTable.get("coverpoint");
+    kw_disable = &IdentTable.get("disable");
+    kw_dist = &IdentTable.get("dist");
+    kw_end = &IdentTable.get("end");
+    kw_endcase = &IdentTable.get("endcase");
+    kw_endchecker = &IdentTable.get("endchecker");
+    kw_endclass = &IdentTable.get("endclass");
+    kw_endclocking = &IdentTable.get("endclocking");
+    kw_endfunction = &IdentTable.get("endfunction");
+    kw_endgenerate = &IdentTable.get("endgenerate");
+    kw_endgroup = &IdentTable.get("endgroup");
+    kw_endinterface = &IdentTable.get("endinterface");
+    kw_endmodule = &IdentTable.get("endmodule");
+    kw_endpackage = &IdentTable.get("endpackage");
+    kw_endprimitive = &IdentTable.get("endprimitive");
+    kw_endprogram = &IdentTable.get("endprogram");
+    kw_endproperty = &IdentTable.get("endproperty");
+    kw_endsequence = &IdentTable.get("endsequence");
+    kw_endspecify = &IdentTable.get("endspecify");
+    kw_endtable = &IdentTable.get("endtable");
+    kw_endtask = &IdentTable.get("endtask");
+    kw_forever = &IdentTable.get("forever");
+    kw_fork = &IdentTable.get("fork");
+    kw_generate = &IdentTable.get("generate");
+    kw_highz0 = &IdentTable.get("highz0");
+    kw_highz1 = &IdentTable.get("highz1");
+    kw_iff = &IdentTable.get("iff");
+    kw_ifnone = &IdentTable.get("ifnone");
+    kw_ignore_bins = &IdentTable.get("ignore_bins");
+    kw_illegal_bins = &IdentTable.get("illegal_bins");
+    kw_initial = &IdentTable.get("initial");
+    kw_inout = &IdentTable.get("inout");
+    kw_input = &IdentTable.get("input");
+    kw_inside = &IdentTable.get("inside");
+    kw_interconnect = &IdentTable.get("interconnect");
+    kw_intersect = &IdentTable.get("intersect");
+    kw_join = &IdentTable.get("join");
+    kw_join_any = &IdentTable.get("join_any");
+    kw_join_none = &IdentTable.get("join_none");
+    kw_large = &IdentTable.get("large");
+    kw_local = &IdentTable.get("local");
+    kw_localparam = &IdentTable.get("localparam");
+    kw_macromodule = &IdentTable.get("macromodule");
+    kw_matches = &IdentTable.get("matches");
+    kw_medium = &IdentTable.get("medium");
+    kw_output = &IdentTable.get("output");
+    kw_packed = &IdentTable.get("packed");
+    kw_parameter = &IdentTable.get("parameter");
+    kw_primitive = &IdentTable.get("primitive");
+    kw_priority = &IdentTable.get("priority");
+    kw_program = &IdentTable.get("program");
+    kw_property = &IdentTable.get("property");
+    kw_pull0 = &IdentTable.get("pull0");
+    kw_pull1 = &IdentTable.get("pull1");
+    kw_pure = &IdentTable.get("pure");
+    kw_rand = &IdentTable.get("rand");
+    kw_randc = &IdentTable.get("randc");
+    kw_randcase = &IdentTable.get("randcase");
+    kw_randsequence = &IdentTable.get("randsequence");
+    kw_repeat = &IdentTable.get("repeat");
+    kw_sample = &IdentTable.get("sample");
+    kw_scalared = &IdentTable.get("scalared");
+    kw_sequence = &IdentTable.get("sequence");
+    kw_small = &IdentTable.get("small");
+    kw_soft = &IdentTable.get("soft");
+    kw_solve = &IdentTable.get("solve");
+    kw_specify = &IdentTable.get("specify");
+    kw_specparam = &IdentTable.get("specparam");
+    kw_strong0 = &IdentTable.get("strong0");
+    kw_strong1 = &IdentTable.get("strong1");
+    kw_supply0 = &IdentTable.get("supply0");
+    kw_supply1 = &IdentTable.get("supply1");
+    kw_table = &IdentTable.get("table");
+    kw_tagged = &IdentTable.get("tagged");
+    kw_task = &IdentTable.get("task");
+    kw_tri = &IdentTable.get("tri");
+    kw_tri0 = &IdentTable.get("tri0");
+    kw_tri1 = &IdentTable.get("tri1");
+    kw_triand = &IdentTable.get("triand");
+    kw_trior = &IdentTable.get("trior");
+    kw_trireg = &IdentTable.get("trireg");
+    kw_unique = &IdentTable.get("unique");
+    kw_unique0 = &IdentTable.get("unique0");
+    kw_uwire = &IdentTable.get("uwire");
+    kw_vectored = &IdentTable.get("vectored");
+    kw_wand = &IdentTable.get("wand");
+    kw_weak0 = &IdentTable.get("weak0");
+    kw_weak1 = &IdentTable.get("weak1");
+    kw_wildcard = &IdentTable.get("wildcard");
+    kw_wire = &IdentTable.get("wire");
+    kw_with = &IdentTable.get("with");
+    kw_wor = &IdentTable.get("wor");
+
     // Keep this at the end of the constructor to make sure everything here
     // is
     // already initialized.
@@ -1002,6 +1114,42 @@ struct AdditionalKeywords {
          kw_set, kw_type, kw_typeof, kw_var, kw_yield,
          // Keywords from the Java section.
          kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
+
+    // Some keywords are not included here because they don't need special
+    // treatment like `showcancelled` or they should be treated as identifiers
+    // like `int` and `logic`.
+    VerilogExtraKeywords = std::unordered_set<IdentifierInfo *>(
+        {kw_always,       kw_always_comb,  kw_always_ff,    kw_always_latch,
+         kw_assert,       kw_assign,       kw_assume,       kw_automatic,
+         kw_before,       kw_begin,        kw_bins,         kw_binsof,
+         kw_casex,        kw_casez,        kw_celldefine,   kw_checker,
+         kw_clocking,     kw_constraint,   kw_cover,        kw_covergroup,
+         kw_coverpoint,   kw_disable,      kw_dist,         kw_end,
+         kw_endcase,      kw_endchecker,   kw_endclass,     kw_endclocking,
+         kw_endfunction,  kw_endgenerate,  kw_endgroup,     kw_endinterface,
+         kw_endmodule,    kw_endpackage,   kw_endprimitive, kw_endprogram,
+         kw_endproperty,  kw_endsequence,  kw_endspecify,   kw_endtable,
+         kw_endtask,      kw_extends,      kw_final,        kw_foreach,
+         kw_forever,      kw_fork,         kw_function,     kw_generate,
+         kw_highz0,       kw_highz1,       kw_iff,          kw_ifnone,
+         kw_ignore_bins,  kw_illegal_bins, kw_implements,   kw_import,
+         kw_initial,      kw_inout,        kw_input,        kw_inside,
+         kw_interconnect, kw_interface,    kw_intersect,    kw_join,
+         kw_join_any,     kw_join_none,    kw_large,        kw_let,
+         kw_local,        kw_localparam,   kw_macromodule,  kw_matches,
+         kw_medium,       kw_output,       kw_package,      kw_packed,
+         kw_parameter,    kw_primitive,    kw_priority,     kw_program,
+         kw_property,     kw_pull0,        kw_pull1,        kw_pure,
+         kw_rand,         kw_randc,        kw_randcase,     kw_randsequence,
+         kw_ref,          kw_repeat,       kw_sample,       kw_scalared,
+         kw_sequence,     kw_small,        kw_soft,         kw_solve,
+         kw_specify,      kw_specparam,    kw_strong0,      kw_strong1,
+         kw_supply0,      kw_supply1,      kw_table,        kw_tagged,
+         kw_task,         kw_tri,          kw_tri0,         kw_tri1,
+         kw_triand,       kw_trior,        kw_trireg,       kw_unique,
+         kw_unique0,      kw_uwire,        kw_var,          kw_vectored,
+         kw_wand,         kw_weak0,        kw_weak1,        kw_wildcard,
+         kw_wire,         kw_with,         kw_wor});
   }
 
   // Context sensitive keywords.
@@ -1107,6 +1255,119 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_when;
   IdentifierInfo *kw_where;
 
+  // Verilog keywords
+  IdentifierInfo *kw_always;
+  IdentifierInfo *kw_always_comb;
+  IdentifierInfo *kw_always_ff;
+  IdentifierInfo *kw_always_latch;
+  IdentifierInfo *kw_assign;
+  IdentifierInfo *kw_assume;
+  IdentifierInfo *kw_automatic;
+  IdentifierInfo *kw_before;
+  IdentifierInfo *kw_begin;
+  IdentifierInfo *kw_bins;
+  IdentifierInfo *kw_binsof;
+  IdentifierInfo *kw_casex;
+  IdentifierInfo *kw_casez;
+  IdentifierInfo *kw_celldefine;
+  IdentifierInfo *kw_checker;
+  IdentifierInfo *kw_clocking;
+  IdentifierInfo *kw_constraint;
+  IdentifierInfo *kw_cover;
+  IdentifierInfo *kw_covergroup;
+  IdentifierInfo *kw_coverpoint;
+  IdentifierInfo *kw_disable;
+  IdentifierInfo *kw_dist;
+  IdentifierInfo *kw_end;
+  IdentifierInfo *kw_endcase;
+  IdentifierInfo *kw_endchecker;
+  IdentifierInfo *kw_endclass;
+  IdentifierInfo *kw_endclocking;
+  IdentifierInfo *kw_endfunction;
+  IdentifierInfo *kw_endgenerate;
+  IdentifierInfo *kw_endgroup;
+  IdentifierInfo *kw_endinterface;
+  IdentifierInfo *kw_endmodule;
+  IdentifierInfo *kw_endpackage;
+  IdentifierInfo *kw_endprimitive;
+  IdentifierInfo *kw_endprogram;
+  IdentifierInfo *kw_endproperty;
+  IdentifierInfo *kw_endsequence;
+  IdentifierInfo *kw_endspecify;
+  IdentifierInfo *kw_endtable;
+  IdentifierInfo *kw_endtask;
+  IdentifierInfo *kw_forever;
+  IdentifierInfo *kw_fork;
+  IdentifierInfo *kw_generate;
+  IdentifierInfo *kw_highz0;
+  IdentifierInfo *kw_highz1;
+  IdentifierInfo *kw_iff;
+  IdentifierInfo *kw_ifnone;
+  IdentifierInfo *kw_ignore_bins;
+  IdentifierInfo *kw_illegal_bins;
+  IdentifierInfo *kw_initial;
+  IdentifierInfo *kw_inout;
+  IdentifierInfo *kw_input;
+  IdentifierInfo *kw_inside;
+  IdentifierInfo *kw_interconnect;
+  IdentifierInfo *kw_intersect;
+  IdentifierInfo *kw_join;
+  IdentifierInfo *kw_join_any;
+  IdentifierInfo *kw_join_none;
+  IdentifierInfo *kw_large;
+  IdentifierInfo *kw_local;
+  IdentifierInfo *kw_localparam;
+  IdentifierInfo *kw_macromodule;
+  IdentifierInfo *kw_matches;
+  IdentifierInfo *kw_medium;
+  IdentifierInfo *kw_output;
+  IdentifierInfo *kw_packed;
+  IdentifierInfo *kw_parameter;
+  IdentifierInfo *kw_primitive;
+  IdentifierInfo *kw_priority;
+  IdentifierInfo *kw_program;
+  IdentifierInfo *kw_property;
+  IdentifierInfo *kw_pull0;
+  IdentifierInfo *kw_pull1;
+  IdentifierInfo *kw_pure;
+  IdentifierInfo *kw_rand;
+  IdentifierInfo *kw_randc;
+  IdentifierInfo *kw_randcase;
+  IdentifierInfo *kw_randsequence;
+  IdentifierInfo *kw_repeat;
+  IdentifierInfo *kw_sample;
+  IdentifierInfo *kw_scalared;
+  IdentifierInfo *kw_sequence;
+  IdentifierInfo *kw_small;
+  IdentifierInfo *kw_soft;
+  IdentifierInfo *kw_solve;
+  IdentifierInfo *kw_specify;
+  IdentifierInfo *kw_specparam;
+  IdentifierInfo *kw_strong0;
+  IdentifierInfo *kw_strong1;
+  IdentifierInfo *kw_supply0;
+  IdentifierInfo *kw_supply1;
+  IdentifierInfo *kw_table;
+  IdentifierInfo *kw_tagged;
+  IdentifierInfo *kw_task;
+  IdentifierInfo *kw_tri;
+  IdentifierInfo *kw_tri0;
+  IdentifierInfo *kw_tri1;
+  IdentifierInfo *kw_triand;
+  IdentifierInfo *kw_trior;
+  IdentifierInfo *kw_trireg;
+  IdentifierInfo *kw_unique;
+  IdentifierInfo *kw_unique0;
+  IdentifierInfo *kw_uwire;
+  IdentifierInfo *kw_vectored;
+  IdentifierInfo *kw_wand;
+  IdentifierInfo *kw_weak0;
+  IdentifierInfo *kw_weak1;
+  IdentifierInfo *kw_wildcard;
+  IdentifierInfo *kw_wire;
+  IdentifierInfo *kw_with;
+  IdentifierInfo *kw_wor;
+
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
   /// If \c AcceptIdentifierName is true, returns true not only for keywords,
@@ -1233,12 +1494,72 @@ struct AdditionalKeywords {
     }
   }
 
+  bool isVerilogIdentifier(const FormatToken &Tok) const {
+    switch (Tok.Tok.getKind()) {
+    case tok::kw_case:
+    case tok::kw_class:
+    case tok::kw_const:
+    case tok::kw_continue:
+    case tok::kw_default:
+    case tok::kw_do:
+    case tok::kw_extern:
+    case tok::kw_else:
+    case tok::kw_enum:
+    case tok::kw_for:
+    case tok::kw_if:
+    case tok::kw_restrict:
+    case tok::kw_signed:
+    case tok::kw_static:
+    case tok::kw_struct:
+    case tok::kw_typedef:
+    case tok::kw_union:
+    case tok::kw_unsigned:
+    case tok::kw_virtual:
+    case tok::kw_while:
+      return false;
+    case tok::identifier:
+      return VerilogExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+             VerilogExtraKeywords.end();
+    default:
+      // getIdentifierInfo returns non-null for both identifiers and keywords.
+      return Tok.Tok.getIdentifierInfo() != nullptr;
+    }
+  }
+
+  /// Returns whether \p Tok is a Verilog keyword that opens a block.
+  bool isVerilogBegin(const FormatToken &Tok) const {
+    // `table` is not included since it needs to be treated specially.
+    return !Tok.endsSequence(kw_fork, kw_disable) &&
+           Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify);
+  }
+
+  /// Returns whether \p Tok is a Verilog keyword that closes a block.
+  bool isVerilogEnd(const FormatToken &Tok) const {
+    return !Tok.endsSequence(kw_join, kw_rand) &&
+           Tok.isOneOf(TT_MacroBlockEnd, kw_end, kw_endcase, kw_endclass,
+                       kw_endclocking, kw_endchecker, kw_endfunction,
+                       kw_endgenerate, kw_endgroup, kw_endinterface,
+                       kw_endmodule, kw_endpackage, kw_endprimitive,
+                       kw_endprogram, kw_endproperty, kw_endsequence,
+                       kw_endspecify, kw_endtable, kw_endtask, kw_join_any,
+                       kw_join_none);
+  }
+
+  /// Whether the token begins a block.
+  bool isBlockBegin(const FormatToken &Tok, const FormatStyle &Style) const {
+    return Tok.is(TT_MacroBlockBegin) ||
+           (Style.isVerilog() ? isVerilogBegin(Tok) : Tok.is(tok::l_brace));
+  }
+
 private:
   /// The JavaScript keywords beyond the C++ keyword set.
   std::unordered_set<IdentifierInfo *> JsExtraKeywords;
 
   /// The C# keywords beyond the C++ keyword set
   std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
+
+  /// The Verilog keywords beyond the C++ keyword set.
+  std::unordered_set<IdentifierInfo *> VerilogExtraKeywords;
 };
 
 } // namespace format

diff  --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index c1bfa4d573a72..11e51da32ac12 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -829,7 +829,17 @@ FormatToken *UnwrappedLineParser::parseBlock(
     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
     bool CanContainBracedList, TokenType NextLBracesType) {
-  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
+  auto HandleVerilogBlockLabel = [this]() {
+    // ":" name
+    if (Style.isVerilog() && FormatTok->is(tok::colon)) {
+      nextToken();
+      if (Keywords.isVerilogIdentifier(*FormatTok))
+        nextToken();
+    }
+  };
+
+  assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
+          (Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) &&
          "'{' or macro block token expected");
   FormatToken *Tok = FormatTok;
   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
@@ -846,6 +856,7 @@ FormatToken *UnwrappedLineParser::parseBlock(
 
   const unsigned InitialLevel = Line->Level;
   nextToken(/*LevelDifference=*/AddLevels);
+  HandleVerilogBlockLabel();
 
   // Bail out if there are too many levels. Otherwise, the stack might overflow.
   if (Line->Level > 300)
@@ -926,6 +937,7 @@ FormatToken *UnwrappedLineParser::parseBlock(
 
   // Munch the closing brace.
   nextToken(/*LevelDifference=*/-AddLevels);
+  HandleVerilogBlockLabel();
 
   if (MacroBlock && FormatTok->is(tok::l_paren))
     parseParens();
@@ -2577,7 +2589,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
   FormatToken *IfLeftBrace = nullptr;
   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
 
-  if (FormatTok->is(tok::l_brace)) {
+  if (Keywords.isBlockBegin(*FormatTok, Style)) {
     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
     IfLeftBrace = FormatTok;
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
@@ -2610,7 +2622,7 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
     }
     nextToken();
     handleAttributes();
-    if (FormatTok->is(tok::l_brace)) {
+    if (Keywords.isBlockBegin(*FormatTok, Style)) {
       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
       FormatTok->setFinalizedType(TT_ElseLBrace);
       ElseLeftBrace = FormatTok;
@@ -2877,7 +2889,7 @@ void UnwrappedLineParser::parseNew() {
 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
   keepAncestorBraces();
 
-  if (FormatTok->is(tok::l_brace)) {
+  if (Keywords.isBlockBegin(*FormatTok, Style)) {
     if (!KeepBraces)
       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
     FormatToken *LeftBrace = FormatTok;
@@ -4166,6 +4178,16 @@ void UnwrappedLineParser::nextToken(int LevelDifference) {
   else
     readTokenWithJavaScriptASI();
   FormatTok->Previous = Previous;
+  if (Style.isVerilog()) {
+    // Blocks in Verilog can have `begin` and `end` instead of braces.  For
+    // keywords like `begin`, we can't treat them the same as left braces
+    // because some contexts require one of them.  For example structs use
+    // braces and if blocks use keywords, and a left brace can occur in an if
+    // statement, but it is not a block.  For keywords like `end`, we simply
+    // treat them the same as right braces.
+    if (Keywords.isVerilogEnd(*FormatTok))
+      FormatTok->Tok.setKind(tok::r_brace);
+  }
 }
 
 void UnwrappedLineParser::distributeComments(

diff  --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index b85cb1220d6ba..07110a0db0916 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -79,7 +79,18 @@ static cl::opt<std::string> AssumeFileName(
     "assume-filename",
     cl::desc("Override filename used to determine the language.\n"
              "When reading from stdin, clang-format assumes this\n"
-             "filename to determine the language."),
+             "filename to determine the language.\n"
+             "Unrecognized filenames are treated as C++.\n"
+             "supported:\n"
+             "  CSharp: .cs\n"
+             "  Java: .java\n"
+             "  JavaScript: .mjs .js .ts\n"
+             "  Json: .json\n"
+             "  Objective-C: .m .mm\n"
+             "  Proto: .proto .protodevel\n"
+             "  TableGen: .td\n"
+             "  TextProto: .textpb .pb.txt .textproto .asciipb\n"
+             "  Verilog: .sv .svh .v .vh"),
     cl::init("<stdin>"), cl::cat(ClangFormatCategory));
 
 static cl::opt<bool> Inplace("i",

diff  --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index a4ece033d6073..9cc6c7a96af59 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -17,6 +17,7 @@ add_clang_unittest(FormatTests
   FormatTestSelective.cpp
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
+  FormatTestVerilog.cpp
   MacroExpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   QualifierFixerTest.cpp

diff  --git a/clang/unittests/Format/FormatTestUtils.h b/clang/unittests/Format/FormatTestUtils.h
index ace5a4519d20b..defe0738c28ce 100644
--- a/clang/unittests/Format/FormatTestUtils.h
+++ b/clang/unittests/Format/FormatTestUtils.h
@@ -19,7 +19,10 @@ namespace clang {
 namespace format {
 namespace test {
 
-inline std::string messUp(llvm::StringRef Code) {
+// When HandleHash is false, preprocessor directives starting with hash will not
+// be on separate lines.  This is needed because Verilog uses hash for other
+// purposes.
+inline std::string messUp(llvm::StringRef Code, bool HandleHash = true) {
   std::string MessedUp(Code.str());
   bool InComment = false;
   bool InPreprocessorDirective = false;
@@ -29,7 +32,7 @@ inline std::string messUp(llvm::StringRef Code) {
       if (JustReplacedNewline)
         MessedUp[i - 1] = '\n';
       InComment = true;
-    } else if (MessedUp[i] == '#' &&
+    } else if (HandleHash && MessedUp[i] == '#' &&
                (JustReplacedNewline || i == 0 || MessedUp[i - 1] == '\n')) {
       if (i != 0)
         MessedUp[i - 1] = '\n';

diff  --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp
new file mode 100644
index 0000000000000..49e55fff4e9c1
--- /dev/null
+++ b/clang/unittests/Format/FormatTestVerilog.cpp
@@ -0,0 +1,118 @@
+//===- unittest/Format/FormatTestVerilog.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestUtils.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+namespace clang {
+namespace format {
+
+class FormatTestVerilog : public ::testing::Test {
+protected:
+  static std::string format(llvm::StringRef Code, unsigned Offset,
+                            unsigned Length, const FormatStyle &Style) {
+    LLVM_DEBUG(llvm::errs() << "---\n");
+    LLVM_DEBUG(llvm::errs() << Code << "\n\n");
+    std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
+    tooling::Replacements Replaces = reformat(Style, Code, Ranges);
+    auto Result = applyAllReplacements(Code, Replaces);
+    EXPECT_TRUE(static_cast<bool>(Result));
+    LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
+    return *Result;
+  }
+
+  static std::string format(llvm::StringRef Code, const FormatStyle &Style) {
+    return format(Code, 0, Code.size(), Style);
+  }
+
+  static void verifyFormat(
+      llvm::StringRef Code,
+      const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) {
+    EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
+    EXPECT_EQ(Code.str(),
+              format(test::messUp(Code, /*HandleHash=*/false), Style));
+  }
+};
+
+TEST_F(FormatTestVerilog, If) {
+  verifyFormat("if (x)\n"
+               "  x = x;");
+  verifyFormat("if (x)\n"
+               "  x = x;\n"
+               "x = x;");
+
+  // Test else
+  verifyFormat("if (x)\n"
+               "  x = x;\n"
+               "else if (x)\n"
+               "  x = x;\n"
+               "else\n"
+               "  x = x;");
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "end else if (x) begin\n"
+               "  x = x;\n"
+               "end else begin\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x else if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x else begin : x\n"
+               "  x = x;\n"
+               "end : x");
+
+  // Test block keywords.
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x");
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("disable fork;\n"
+               "x = x;");
+  verifyFormat("rand join x x;\n"
+               "x = x;");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join_any");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join_none");
+  verifyFormat("if (x) generate\n"
+               "  x = x;\n"
+               "endgenerate");
+  verifyFormat("if (x) generate : x\n"
+               "  x = x;\n"
+               "endgenerate : x");
+
+  // Test that concatenation braces don't get regarded as blocks.
+  verifyFormat("if (x)\n"
+               "  {x} = x;");
+  verifyFormat("if (x)\n"
+               "  x = {x};");
+  verifyFormat("if (x)\n"
+               "  x = {x};\n"
+               "else\n"
+               "  {x} = {x};");
+}
+
+} // namespace format
+} // end namespace clang


        


More information about the cfe-commits mailing list