[clang] Added keyword #undef to llvm-tblgen and fixed a small bug for llvm-tb… (PR #69093)

Sun Oct 15 05:17:28 PDT 2023

https://github.com/whousemyname updated https://github.com/llvm/llvm-project/pull/69093

>From aa8c80fffcbb3844a07347e7be14756b2d44d3a3 Mon Sep 17 00:00:00 2001
From: angryZ <lazytortoisezzzz at gmail.com>
Date: Sun, 15 Oct 2023 14:53:03 +0800
Subject: [PATCH] Added keyword #undef to llvm-tblgen and fixed a small bug for
 llvm-tblgen

---
 llvm/lib/TableGen/TGLexer.cpp | 242 +++++++++++++++++++++-------------
 llvm/lib/TableGen/TGLexer.h   |  11 +-
 2 files changed, 156 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index d5140e91fce9e94..4069c425c317e64 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -12,6 +12,7 @@
 
 #include "TGLexer.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h" // for strtoull()/strtoll() define
@@ -35,13 +36,9 @@ namespace {
 struct {
   tgtok::TokKind Kind;
   const char *Word;
-} PreprocessorDirs[] = {
-  { tgtok::Ifdef, "ifdef" },
-  { tgtok::Ifndef, "ifndef" },
-  { tgtok::Else, "else" },
-  { tgtok::Endif, "endif" },
-  { tgtok::Define, "define" }
-};
+} PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"},   {tgtok::Ifndef, "ifndef"},
+                        {tgtok::Else, "else"},     {tgtok::Endif, "endif"},
+                        {tgtok::Define, "define"}, {tgtok::Undef, "undef"}};
 } // end anonymous namespace
 
 TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
@@ -59,9 +56,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
     DefinedMacros.insert(MacroName);
 }
 
-SMLoc TGLexer::getLoc() const {
-  return SMLoc::getFromPointer(TokStart);
-}
+SMLoc TGLexer::getLoc() const { return SMLoc::getFromPointer(TokStart); }
 
 SMRange TGLexer::getLocRange() const {
   return {getLoc(), SMLoc::getFromPointer(CurPtr)};
@@ -128,16 +123,13 @@ int TGLexer::getNextChar() {
     // Handle the newline character by ignoring it and incrementing the line
     // count.  However, be careful about 'dos style' files with \n\r in them.
     // Only treat a \n\r or \r\n as a single line.
-    if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
-        *CurPtr != CurChar)
-      ++CurPtr;  // Eat the two char newline sequence.
+    if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar)
+      ++CurPtr; // Eat the two char newline sequence.
     return '\n';
   }
 }
 
-int TGLexer::peekNextChar(int Index) const {
-  return *(CurPtr + Index);
-}
+int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); }
 
 tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
   TokStart = CurPtr;
@@ -164,18 +156,30 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
     // Return EOF denoting the end of lexing.
     return tgtok::Eof;
 
-  case ':': return tgtok::colon;
-  case ';': return tgtok::semi;
-  case ',': return tgtok::comma;
-  case '<': return tgtok::less;
-  case '>': return tgtok::greater;
-  case ']': return tgtok::r_square;
-  case '{': return tgtok::l_brace;
-  case '}': return tgtok::r_brace;
-  case '(': return tgtok::l_paren;
-  case ')': return tgtok::r_paren;
-  case '=': return tgtok::equal;
-  case '?': return tgtok::question;
+  case ':':
+    return tgtok::colon;
+  case ';':
+    return tgtok::semi;
+  case ',':
+    return tgtok::comma;
+  case '<':
+    return tgtok::less;
+  case '>':
+    return tgtok::greater;
+  case ']':
+    return tgtok::r_square;
+  case '{':
+    return tgtok::l_brace;
+  case '}':
+    return tgtok::r_brace;
+  case '(':
+    return tgtok::l_paren;
+  case ')':
+    return tgtok::r_paren;
+  case '=':
+    return tgtok::equal;
+  case '?':
+    return tgtok::question;
   case '#':
     if (FileOrLineStart) {
       tgtok::TokKind Kind = prepIsDirective();
@@ -220,9 +224,18 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
     } else // Otherwise, this is an error.
       return ReturnError(TokStart, "Unexpected character");
     return LexToken(FileOrLineStart);
-  case '-': case '+':
-  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
-  case '7': case '8': case '9': {
+  case '-':
+  case '+':
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9': {
     int NextChar = 0;
     if (isdigit(CurChar)) {
       // Allow identifiers to start with a number if it is followed by
@@ -240,14 +253,31 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
         switch (NextNextChar) {
         default:
           break;
-        case '0': case '1':
+        case '0':
+        case '1':
           if (NextChar == 'b')
             return LexNumber();
           [[fallthrough]];
-        case '2': case '3': case '4': case '5':
-        case '6': case '7': case '8': case '9':
-        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+        case 'a':
+        case 'b':
+        case 'c':
+        case 'd':
+        case 'e':
+        case 'f':
+        case 'A':
+        case 'B':
+        case 'C':
+        case 'D':
+        case 'E':
+        case 'F':
           if (NextChar == 'x')
             return LexNumber();
           break;
@@ -260,10 +290,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
 
     return LexNumber();
   }
-  case '"': return LexString();
-  case '$': return LexVarName();
-  case '[': return LexBracket();
-  case '!': return LexExclaim();
+  case '"':
+    return LexString();
+  case '$':
+    return LexVarName();
+  case '[':
+    return LexBracket();
+  case '!':
+    return LexExclaim();
   }
 }
 
@@ -289,7 +323,9 @@ tgtok::TokKind TGLexer::LexString() {
     ++CurPtr;
 
     switch (*CurPtr) {
-    case '\\': case '\'': case '"':
+    case '\\':
+    case '\'':
+    case '"':
       // These turn into their literal character.
       CurStrVal += *CurPtr++;
       break;
@@ -343,45 +379,46 @@ tgtok::TokKind TGLexer::LexIdentifier() {
     ++CurPtr;
 
   // Check to see if this identifier is a reserved keyword.
-  StringRef Str(IdentStart, CurPtr-IdentStart);
+  StringRef Str(IdentStart, CurPtr - IdentStart);
 
   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
-    .Case("int", tgtok::Int)
-    .Case("bit", tgtok::Bit)
-    .Case("bits", tgtok::Bits)
-    .Case("string", tgtok::String)
-    .Case("list", tgtok::List)
-    .Case("code", tgtok::Code)
-    .Case("dag", tgtok::Dag)
-    .Case("class", tgtok::Class)
-    .Case("def", tgtok::Def)
-    .Case("true", tgtok::TrueVal)
-    .Case("false", tgtok::FalseVal)
-    .Case("foreach", tgtok::Foreach)
-    .Case("defm", tgtok::Defm)
-    .Case("defset", tgtok::Defset)
-    .Case("multiclass", tgtok::MultiClass)
-    .Case("field", tgtok::Field)
-    .Case("let", tgtok::Let)
-    .Case("in", tgtok::In)
-    .Case("defvar", tgtok::Defvar)
-    .Case("include", tgtok::Include)
-    .Case("if", tgtok::If)
-    .Case("then", tgtok::Then)
-    .Case("else", tgtok::ElseKW)
-    .Case("assert", tgtok::Assert)
-    .Default(tgtok::Id);
+                            .Case("int", tgtok::Int)
+                            .Case("bit", tgtok::Bit)
+                            .Case("bits", tgtok::Bits)
+                            .Case("string", tgtok::String)
+                            .Case("list", tgtok::List)
+                            .Case("code", tgtok::Code)
+                            .Case("dag", tgtok::Dag)
+                            .Case("class", tgtok::Class)
+                            .Case("def", tgtok::Def)
+                            .Case("true", tgtok::TrueVal)
+                            .Case("false", tgtok::FalseVal)
+                            .Case("foreach", tgtok::Foreach)
+                            .Case("defm", tgtok::Defm)
+                            .Case("defset", tgtok::Defset)
+                            .Case("multiclass", tgtok::MultiClass)
+                            .Case("field", tgtok::Field)
+                            .Case("let", tgtok::Let)
+                            .Case("in", tgtok::In)
+                            .Case("defvar", tgtok::Defvar)
+                            .Case("include", tgtok::Include)
+                            .Case("if", tgtok::If)
+                            .Case("then", tgtok::Then)
+                            .Case("else", tgtok::ElseKW)
+                            .Case("assert", tgtok::Assert)
+                            .Default(tgtok::Id);
 
   // A couple of tokens require special processing.
   switch (Kind) {
-    case tgtok::Include:
-      if (LexInclude()) return tgtok::Error;
-      return Lex();
-    case tgtok::Id:
-      CurStrVal.assign(Str.begin(), Str.end());
-      break;
-    default:
-      break;
+  case tgtok::Include:
+    if (LexInclude())
+      return tgtok::Error;
+    return Lex();
+  case tgtok::Id:
+    CurStrVal.assign(Str.begin(), Str.end());
+    break;
+  default:
+    break;
   }
 
   return Kind;
@@ -392,7 +429,8 @@ tgtok::TokKind TGLexer::LexIdentifier() {
 bool TGLexer::LexInclude() {
   // The token after the include must be a string.
   tgtok::TokKind Tok = LexToken();
-  if (Tok == tgtok::Error) return true;
+  if (Tok == tgtok::Error)
+    return true;
   if (Tok != tgtok::StrVal) {
     PrintError(getLoc(), "Expected filename after include");
     return true;
@@ -422,7 +460,7 @@ bool TGLexer::LexInclude() {
 /// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
 /// Or we may end up at the end of the buffer.
 void TGLexer::SkipBCPLComment() {
-  ++CurPtr;  // skip the second slash.
+  ++CurPtr; // skip the second slash.
   auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
   CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
 }
@@ -430,7 +468,7 @@ void TGLexer::SkipBCPLComment() {
 /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
 /// is that we allow nesting.
 bool TGLexer::SkipCComment() {
-  ++CurPtr;  // skip the star.
+  ++CurPtr; // skip the star.
   unsigned CommentDepth = 1;
 
   while (true) {
@@ -441,15 +479,17 @@ bool TGLexer::SkipCComment() {
       return true;
     case '*':
       // End of the comment?
-      if (CurPtr[0] != '/') break;
+      if (CurPtr[0] != '/')
+        break;
 
-      ++CurPtr;   // End the */.
+      ++CurPtr; // End the */.
       if (--CommentDepth == 0)
         return false;
       break;
     case '/':
       // Start of a nested comment?
-      if (CurPtr[0] != '*') break;
+      if (CurPtr[0] != '*')
+        break;
       ++CurPtr;
       ++CommentDepth;
       break;
@@ -529,14 +569,17 @@ tgtok::TokKind TGLexer::LexBracket() {
   const char *CodeStart = CurPtr;
   while (true) {
     int Char = getNextChar();
-    if (Char == EOF) break;
+    if (Char == EOF)
+      break;
 
-    if (Char != '}') continue;
+    if (Char != '}')
+      continue;
 
     Char = getNextChar();
-    if (Char == EOF) break;
+    if (Char == EOF)
+      break;
     if (Char == ']') {
-      CurStrVal.assign(CodeStart, CurPtr-2);
+      CurStrVal.assign(CodeStart, CurPtr - 2);
       return tgtok::CodeFragment;
     }
   }
@@ -608,7 +651,8 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("repr", tgtok::XRepr)
           .Default(tgtok::Error);
 
-  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+  return Kind != tgtok::Error ? Kind
+                              : ReturnError(Start - 1, "Unknown operator");
 }
 
 bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
@@ -664,7 +708,7 @@ tgtok::TokKind TGLexer::prepIsDirective() const {
           // It looks like TableGen does not support '\r' as the actual
           // carriage return, e.g. getNextChar() treats a single '\r'
           // as '\n'.  So we do the same here.
-          NextChar == '\r')
+          NextChar == '\r' || NextChar == '\0')
         return Kind;
 
       // Allow comments after some directives, e.g.:
@@ -708,8 +752,8 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
   return false;
 }
 
-tgtok::TokKind TGLexer::lexPreprocessor(
-    tgtok::TokKind Kind, bool ReturnNextLiveToken) {
+tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
+                                        bool ReturnNextLiveToken) {
 
   // We must be looking at a preprocessing directive.  Eat it!
   if (!prepEatPreprocessorDirective(Kind))
@@ -834,6 +878,24 @@ tgtok::TokKind TGLexer::lexPreprocessor(
       return tgtok::Error;
     }
 
+    return LexToken();
+  } else if (Kind == tgtok::Undef) {
+    StringRef MacroName = prepLexMacroName();
+    if (MacroName.empty())
+      return ReturnError(TokStart, "Expected macor name after #undef");
+
+    if (!DefinedMacros.erase(MacroName))
+      return ReturnError(TokStart, "undefine(#undef) an undefined macro");
+
+    if (!prepSkipDirectiveEnd())
+      return ReturnError(CurPtr,
+                         "Only comments are supported after #undef NAME");
+
+    if (!ReturnNextLiveToken) {
+      PrintFatalError("#undef must be ignored during the lines skipping");
+      return tgtok::Error;
+    }
+
     return LexToken();
   }
 
@@ -867,7 +929,7 @@ bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
     // If we did not find a preprocessing directive or it is #define,
     // then just skip to the next line.  We do not have to do anything
     // for #define in the line-skipping mode.
-    if (Kind == tgtok::Error || Kind == tgtok::Define)
+    if (Kind == tgtok::Error || Kind == tgtok::Define || Kind == tgtok::Undef)
       continue;
 
     tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 4429c91b7c9cf76..cb7d3b63c90e5ac 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -72,6 +72,7 @@ enum TokKind {
   Else,
   Endif,
   Define,
+  Undef,
 
   // Reserved keywords. ('ElseKW' is named to distinguish it from the
   // existing 'Else' that means the preprocessor #else.)
@@ -210,13 +211,9 @@ class TGLexer {
 public:
   TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
 
-  tgtok::TokKind Lex() {
-    return CurCode = LexToken(CurPtr == CurBuf.begin());
-  }
+  tgtok::TokKind Lex() { return CurCode = LexToken(CurPtr == CurBuf.begin()); }
 
-  const DependenciesSetTy &getDependencies() const {
-    return Dependencies;
-  }
+  const DependenciesSetTy &getDependencies() const { return Dependencies; }
 
   tgtok::TokKind getCode() const { return CurCode; }
 
@@ -232,7 +229,7 @@ class TGLexer {
   std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
     assert(CurCode == tgtok::BinaryIntVal &&
            "This token isn't a binary integer");
-    return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
+    return std::make_pair(CurIntVal, (CurPtr - TokStart) - 2);
   }
 
   SMLoc getLoc() const;