[clang] Added keyword #undef to llvm-tblgen and fixed a small bug for llvm-tb… (PR #69093)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 15 05:17:28 PDT 2023
https://github.com/whousemyname updated https://github.com/llvm/llvm-project/pull/69093
>From aa8c80fffcbb3844a07347e7be14756b2d44d3a3 Mon Sep 17 00:00:00 2001
From: angryZ <lazytortoisezzzz at gmail.com>
Date: Sun, 15 Oct 2023 14:53:03 +0800
Subject: [PATCH] Added keyword #undef to llvm-tblgen and fixed a small bug for
llvm-tblgen
---
llvm/lib/TableGen/TGLexer.cpp | 242 +++++++++++++++++++++-------------
llvm/lib/TableGen/TGLexer.h | 11 +-
2 files changed, 156 insertions(+), 97 deletions(-)
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index d5140e91fce9e94..4069c425c317e64 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -12,6 +12,7 @@
#include "TGLexer.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
@@ -35,13 +36,9 @@ namespace {
struct {
tgtok::TokKind Kind;
const char *Word;
-} PreprocessorDirs[] = {
- { tgtok::Ifdef, "ifdef" },
- { tgtok::Ifndef, "ifndef" },
- { tgtok::Else, "else" },
- { tgtok::Endif, "endif" },
- { tgtok::Define, "define" }
-};
+} PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"}, {tgtok::Ifndef, "ifndef"},
+ {tgtok::Else, "else"}, {tgtok::Endif, "endif"},
+ {tgtok::Define, "define"}, {tgtok::Undef, "undef"}};
} // end anonymous namespace
TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
@@ -59,9 +56,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
DefinedMacros.insert(MacroName);
}
-SMLoc TGLexer::getLoc() const {
- return SMLoc::getFromPointer(TokStart);
-}
+SMLoc TGLexer::getLoc() const { return SMLoc::getFromPointer(TokStart); }
SMRange TGLexer::getLocRange() const {
return {getLoc(), SMLoc::getFromPointer(CurPtr)};
@@ -128,16 +123,13 @@ int TGLexer::getNextChar() {
// Handle the newline character by ignoring it and incrementing the line
// count. However, be careful about 'dos style' files with \n\r in them.
// Only treat a \n\r or \r\n as a single line.
- if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
- *CurPtr != CurChar)
- ++CurPtr; // Eat the two char newline sequence.
+ if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar)
+ ++CurPtr; // Eat the two char newline sequence.
return '\n';
}
}
-int TGLexer::peekNextChar(int Index) const {
- return *(CurPtr + Index);
-}
+int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); }
tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
TokStart = CurPtr;
@@ -164,18 +156,30 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
// Return EOF denoting the end of lexing.
return tgtok::Eof;
- case ':': return tgtok::colon;
- case ';': return tgtok::semi;
- case ',': return tgtok::comma;
- case '<': return tgtok::less;
- case '>': return tgtok::greater;
- case ']': return tgtok::r_square;
- case '{': return tgtok::l_brace;
- case '}': return tgtok::r_brace;
- case '(': return tgtok::l_paren;
- case ')': return tgtok::r_paren;
- case '=': return tgtok::equal;
- case '?': return tgtok::question;
+ case ':':
+ return tgtok::colon;
+ case ';':
+ return tgtok::semi;
+ case ',':
+ return tgtok::comma;
+ case '<':
+ return tgtok::less;
+ case '>':
+ return tgtok::greater;
+ case ']':
+ return tgtok::r_square;
+ case '{':
+ return tgtok::l_brace;
+ case '}':
+ return tgtok::r_brace;
+ case '(':
+ return tgtok::l_paren;
+ case ')':
+ return tgtok::r_paren;
+ case '=':
+ return tgtok::equal;
+ case '?':
+ return tgtok::question;
case '#':
if (FileOrLineStart) {
tgtok::TokKind Kind = prepIsDirective();
@@ -220,9 +224,18 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
} else // Otherwise, this is an error.
return ReturnError(TokStart, "Unexpected character");
return LexToken(FileOrLineStart);
- case '-': case '+':
- case '0': case '1': case '2': case '3': case '4': case '5': case '6':
- case '7': case '8': case '9': {
+ case '-':
+ case '+':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
int NextChar = 0;
if (isdigit(CurChar)) {
// Allow identifiers to start with a number if it is followed by
@@ -240,14 +253,31 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
switch (NextNextChar) {
default:
break;
- case '0': case '1':
+ case '0':
+ case '1':
if (NextChar == 'b')
return LexNumber();
[[fallthrough]];
- case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
if (NextChar == 'x')
return LexNumber();
break;
@@ -260,10 +290,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
return LexNumber();
}
- case '"': return LexString();
- case '$': return LexVarName();
- case '[': return LexBracket();
- case '!': return LexExclaim();
+ case '"':
+ return LexString();
+ case '$':
+ return LexVarName();
+ case '[':
+ return LexBracket();
+ case '!':
+ return LexExclaim();
}
}
@@ -289,7 +323,9 @@ tgtok::TokKind TGLexer::LexString() {
++CurPtr;
switch (*CurPtr) {
- case '\\': case '\'': case '"':
+ case '\\':
+ case '\'':
+ case '"':
// These turn into their literal character.
CurStrVal += *CurPtr++;
break;
@@ -343,45 +379,46 @@ tgtok::TokKind TGLexer::LexIdentifier() {
++CurPtr;
// Check to see if this identifier is a reserved keyword.
- StringRef Str(IdentStart, CurPtr-IdentStart);
+ StringRef Str(IdentStart, CurPtr - IdentStart);
tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
- .Case("int", tgtok::Int)
- .Case("bit", tgtok::Bit)
- .Case("bits", tgtok::Bits)
- .Case("string", tgtok::String)
- .Case("list", tgtok::List)
- .Case("code", tgtok::Code)
- .Case("dag", tgtok::Dag)
- .Case("class", tgtok::Class)
- .Case("def", tgtok::Def)
- .Case("true", tgtok::TrueVal)
- .Case("false", tgtok::FalseVal)
- .Case("foreach", tgtok::Foreach)
- .Case("defm", tgtok::Defm)
- .Case("defset", tgtok::Defset)
- .Case("multiclass", tgtok::MultiClass)
- .Case("field", tgtok::Field)
- .Case("let", tgtok::Let)
- .Case("in", tgtok::In)
- .Case("defvar", tgtok::Defvar)
- .Case("include", tgtok::Include)
- .Case("if", tgtok::If)
- .Case("then", tgtok::Then)
- .Case("else", tgtok::ElseKW)
- .Case("assert", tgtok::Assert)
- .Default(tgtok::Id);
+ .Case("int", tgtok::Int)
+ .Case("bit", tgtok::Bit)
+ .Case("bits", tgtok::Bits)
+ .Case("string", tgtok::String)
+ .Case("list", tgtok::List)
+ .Case("code", tgtok::Code)
+ .Case("dag", tgtok::Dag)
+ .Case("class", tgtok::Class)
+ .Case("def", tgtok::Def)
+ .Case("true", tgtok::TrueVal)
+ .Case("false", tgtok::FalseVal)
+ .Case("foreach", tgtok::Foreach)
+ .Case("defm", tgtok::Defm)
+ .Case("defset", tgtok::Defset)
+ .Case("multiclass", tgtok::MultiClass)
+ .Case("field", tgtok::Field)
+ .Case("let", tgtok::Let)
+ .Case("in", tgtok::In)
+ .Case("defvar", tgtok::Defvar)
+ .Case("include", tgtok::Include)
+ .Case("if", tgtok::If)
+ .Case("then", tgtok::Then)
+ .Case("else", tgtok::ElseKW)
+ .Case("assert", tgtok::Assert)
+ .Default(tgtok::Id);
// A couple of tokens require special processing.
switch (Kind) {
- case tgtok::Include:
- if (LexInclude()) return tgtok::Error;
- return Lex();
- case tgtok::Id:
- CurStrVal.assign(Str.begin(), Str.end());
- break;
- default:
- break;
+ case tgtok::Include:
+ if (LexInclude())
+ return tgtok::Error;
+ return Lex();
+ case tgtok::Id:
+ CurStrVal.assign(Str.begin(), Str.end());
+ break;
+ default:
+ break;
}
return Kind;
@@ -392,7 +429,8 @@ tgtok::TokKind TGLexer::LexIdentifier() {
bool TGLexer::LexInclude() {
// The token after the include must be a string.
tgtok::TokKind Tok = LexToken();
- if (Tok == tgtok::Error) return true;
+ if (Tok == tgtok::Error)
+ return true;
if (Tok != tgtok::StrVal) {
PrintError(getLoc(), "Expected filename after include");
return true;
@@ -422,7 +460,7 @@ bool TGLexer::LexInclude() {
/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
/// Or we may end up at the end of the buffer.
void TGLexer::SkipBCPLComment() {
- ++CurPtr; // skip the second slash.
+ ++CurPtr; // skip the second slash.
auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
}
@@ -430,7 +468,7 @@ void TGLexer::SkipBCPLComment() {
/// SkipCComment - This skips C-style /**/ comments. The only difference from C
/// is that we allow nesting.
bool TGLexer::SkipCComment() {
- ++CurPtr; // skip the star.
+ ++CurPtr; // skip the star.
unsigned CommentDepth = 1;
while (true) {
@@ -441,15 +479,17 @@ bool TGLexer::SkipCComment() {
return true;
case '*':
// End of the comment?
- if (CurPtr[0] != '/') break;
+ if (CurPtr[0] != '/')
+ break;
- ++CurPtr; // End the */.
+ ++CurPtr; // End the */.
if (--CommentDepth == 0)
return false;
break;
case '/':
// Start of a nested comment?
- if (CurPtr[0] != '*') break;
+ if (CurPtr[0] != '*')
+ break;
++CurPtr;
++CommentDepth;
break;
@@ -529,14 +569,17 @@ tgtok::TokKind TGLexer::LexBracket() {
const char *CodeStart = CurPtr;
while (true) {
int Char = getNextChar();
- if (Char == EOF) break;
+ if (Char == EOF)
+ break;
- if (Char != '}') continue;
+ if (Char != '}')
+ continue;
Char = getNextChar();
- if (Char == EOF) break;
+ if (Char == EOF)
+ break;
if (Char == ']') {
- CurStrVal.assign(CodeStart, CurPtr-2);
+ CurStrVal.assign(CodeStart, CurPtr - 2);
return tgtok::CodeFragment;
}
}
@@ -608,7 +651,8 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("repr", tgtok::XRepr)
.Default(tgtok::Error);
- return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+ return Kind != tgtok::Error ? Kind
+ : ReturnError(Start - 1, "Unknown operator");
}
bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
@@ -664,7 +708,7 @@ tgtok::TokKind TGLexer::prepIsDirective() const {
// It looks like TableGen does not support '\r' as the actual
// carriage return, e.g. getNextChar() treats a single '\r'
// as '\n'. So we do the same here.
- NextChar == '\r')
+ NextChar == '\r' || NextChar == '\0')
return Kind;
// Allow comments after some directives, e.g.:
@@ -708,8 +752,8 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
return false;
}
-tgtok::TokKind TGLexer::lexPreprocessor(
- tgtok::TokKind Kind, bool ReturnNextLiveToken) {
+tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
+ bool ReturnNextLiveToken) {
// We must be looking at a preprocessing directive. Eat it!
if (!prepEatPreprocessorDirective(Kind))
@@ -834,6 +878,24 @@ tgtok::TokKind TGLexer::lexPreprocessor(
return tgtok::Error;
}
+ return LexToken();
+ } else if (Kind == tgtok::Undef) {
+ StringRef MacroName = prepLexMacroName();
+ if (MacroName.empty())
+ return ReturnError(TokStart, "Expected macor name after #undef");
+
+ if (!DefinedMacros.erase(MacroName))
+ return ReturnError(TokStart, "undefine(#undef) an undefined macro");
+
+ if (!prepSkipDirectiveEnd())
+ return ReturnError(CurPtr,
+ "Only comments are supported after #undef NAME");
+
+ if (!ReturnNextLiveToken) {
+ PrintFatalError("#undef must be ignored during the lines skipping");
+ return tgtok::Error;
+ }
+
return LexToken();
}
@@ -867,7 +929,7 @@ bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
// If we did not find a preprocessing directive or it is #define,
// then just skip to the next line. We do not have to do anything
// for #define in the line-skipping mode.
- if (Kind == tgtok::Error || Kind == tgtok::Define)
+ if (Kind == tgtok::Error || Kind == tgtok::Define || Kind == tgtok::Undef)
continue;
tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 4429c91b7c9cf76..cb7d3b63c90e5ac 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -72,6 +72,7 @@ enum TokKind {
Else,
Endif,
Define,
+ Undef,
// Reserved keywords. ('ElseKW' is named to distinguish it from the
// existing 'Else' that means the preprocessor #else.)
@@ -210,13 +211,9 @@ class TGLexer {
public:
TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
- tgtok::TokKind Lex() {
- return CurCode = LexToken(CurPtr == CurBuf.begin());
- }
+ tgtok::TokKind Lex() { return CurCode = LexToken(CurPtr == CurBuf.begin()); }
- const DependenciesSetTy &getDependencies() const {
- return Dependencies;
- }
+ const DependenciesSetTy &getDependencies() const { return Dependencies; }
tgtok::TokKind getCode() const { return CurCode; }
@@ -232,7 +229,7 @@ class TGLexer {
std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
assert(CurCode == tgtok::BinaryIntVal &&
"This token isn't a binary integer");
- return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
+ return std::make_pair(CurIntVal, (CurPtr - TokStart) - 2);
}
SMLoc getLoc() const;
More information about the cfe-commits
mailing list