[llvm] [TableGen] Detect invalid -D arguments and fail (PR #102813)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 10:57:42 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/102813

>From f59f4e3375072eb15f3e30cc8a36cce3bdc2e62f Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sun, 11 Aug 2024 06:05:56 -0700
Subject: [PATCH] [TableGen] Detect invalid -D arguments and fail.

- Detect invalid macro names specified on command line and fail if one
  found.
- Specifically, -DXYZ=1 for example, will fail instead is being silently
  accepted.
---
 llvm/lib/TableGen/TGLexer.cpp                 | 44 +++++++++++++------
 .../invalid-macro-name-command-line.td        |  9 ++++
 2 files changed, 40 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/TableGen/invalid-macro-name-command-line.td

diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 0554f0cf578831..c1fae91e1fd9f2 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -44,6 +44,25 @@ constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"},
                                                 {tgtok::Endif, "endif"},
                                                 {tgtok::Define, "define"}};
 
+// Returns a pointer past the end of a valid macro name at the start of `Str`.
+// Valid macro names match the regular expression [a-zA-Z_][0-9a-zA-Z_]*.
+static const char *lexMacroName(StringRef Str) {
+  assert(!Str.empty());
+
+  // Macro names start with [a-zA-Z_].
+  const char *Next = Str.begin();
+  if (*Next != '_' && !isalpha(*Next))
+    return Next;
+  // Eat the first character of the name.
+  ++Next;
+
+  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
+  const char *End = Str.end();
+  while (Next != End && (isalpha(*Next) || isdigit(*Next) || *Next == '_'))
+    ++Next;
+  return Next;
+}
+
 TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
   CurBuffer = SrcMgr.getMainFileID();
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
@@ -54,9 +73,16 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
   PrepIncludeStack.push_back(
       std::make_unique<std::vector<PreprocessorControlDesc>>());
 
-  // Put all macros defined in the command line into the DefinedMacros set.
-  for (const std::string &MacroName : Macros)
+  // Add all macros defined on the command line to the DefinedMacros set.
+  // Check invalid macro names and print fatal error if we find one.
+  for (StringRef MacroName : Macros) {
+    const char *End = lexMacroName(MacroName);
+    if (End != MacroName.end())
+      PrintFatalError("Invalid macro name `" + MacroName +
+                      "` specified on command line");
+
     DefinedMacros.insert(MacroName);
+  }
 }
 
 SMLoc TGLexer::getLoc() const {
@@ -699,9 +725,8 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
   return false;
 }
 
-tgtok::TokKind TGLexer::lexPreprocessor(
-    tgtok::TokKind Kind, bool ReturnNextLiveToken) {
-
+tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
+                                        bool ReturnNextLiveToken) {
   // We must be looking at a preprocessing directive.  Eat it!
   if (!prepEatPreprocessorDirective(Kind))
     PrintFatalError("lexPreprocessor() called for unknown "
@@ -901,14 +926,7 @@ StringRef TGLexer::prepLexMacroName() {
     ++CurPtr;
 
   TokStart = CurPtr;
-  // Macro names start with [a-zA-Z_].
-  if (*CurPtr != '_' && !isalpha(*CurPtr))
-    return "";
-
-  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
-  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
-    ++CurPtr;
-
+  CurPtr = lexMacroName(StringRef(CurPtr, CurBuf.end() - CurPtr));
   return StringRef(TokStart, CurPtr - TokStart);
 }
 
diff --git a/llvm/test/TableGen/invalid-macro-name-command-line.td b/llvm/test/TableGen/invalid-macro-name-command-line.td
new file mode 100644
index 00000000000000..0d2307997ebe54
--- /dev/null
+++ b/llvm/test/TableGen/invalid-macro-name-command-line.td
@@ -0,0 +1,9 @@
+// RUN: not llvm-tblgen %s -DMACRO=1 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-1
+// RUN: not llvm-tblgen %s -D0MAC 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-2
+// RUN: not llvm-tblgen %s -D_MAC# 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-3
+// RUN: not llvm-tblgen %s -D 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-4
+
+// CHECK-TEST-1: error: Invalid macro name `MACRO=1` specified on command line
+// CHECK-TEST-2: error: Invalid macro name `0MAC` specified on command line
+// CHECK-TEST-3: error: Invalid macro name `_MAC#` specified on command line
+// CHECK-TEST-4: for the -D option: requires a value!



More information about the llvm-commits mailing list