[llvm] 376e3b6 - [TableGen] Add `!match` operator to do regex matching (#130759)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 21:13:12 PDT 2025


Author: Pengcheng Wang
Date: 2025-03-13T12:13:09+08:00
New Revision: 376e3b62cd36cabf4235e085cd13df05c2bd754b

URL: https://github.com/llvm/llvm-project/commit/376e3b62cd36cabf4235e085cd13df05c2bd754b
DIFF: https://github.com/llvm/llvm-project/commit/376e3b62cd36cabf4235e085cd13df05c2bd754b.diff

LOG: [TableGen] Add `!match` operator to do regex matching (#130759)

The grammar is `!match(str, regex)` and this operator produces 1
if the `str` matches the regular expression `regex`.

The format of `regex` is ERE (Extended POSIX Regular Expressions).

Added: 
    llvm/test/TableGen/match.td

Modified: 
    llvm/docs/TableGen/ProgRef.rst
    llvm/include/llvm/TableGen/Record.h
    llvm/lib/TableGen/Record.cpp
    llvm/lib/TableGen/TGLexer.cpp
    llvm/lib/TableGen/TGLexer.h
    llvm/lib/TableGen/TGParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index edb97109c9289..0983c6283f7e2 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -225,11 +225,11 @@ TableGen provides "bang operators" that have a wide variety of uses:
                : !getdagname  !getdagop    !gt          !head        !if
                : !initialized !interleave  !isa         !le          !listconcat
                : !listflatten !listremove  !listsplat   !logtwo      !lt
-               : !mul         !ne          !not         !or          !range
-               : !repr        !setdagarg   !setdagname  !setdagop    !shl
-               : !size        !sra         !srl         !strconcat   !sub
-               : !subst       !substr      !tail        !tolower     !toupper
-               : !xor
+               : !match       !mul         !ne          !not         !or
+               : !range       !repr        !setdagarg   !setdagname  !setdagop
+               : !shl         !size        !sra         !srl         !strconcat
+               : !sub         !subst       !substr      !tail        !tolower
+               : !toupper     !xor
 
 The ``!cond`` operator has a slightly 
diff erent
 syntax compared to other bang operators, so it is defined separately:
@@ -1878,6 +1878,10 @@ and non-0 as true.
     This operator produces 1 if *a* is less than *b*; 0 otherwise.
     The arguments must be ``bit``, ``bits``, ``int``, or ``string`` values.
 
+``!match(``\ *str*\ `,` *regex*\ ``)``
+    This operator produces 1 if the *str* matches the regular expression
+    *regex*. The format of *regex* is ERE (Extended POSIX Regular Expressions).
+
 ``!mul(``\ *a*\ ``,`` *b*\ ``, ...)``
     This operator multiplies *a*, *b*, etc., and produces the product.
 

diff  --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 334007524c954..ae505631b5433 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -910,6 +910,7 @@ class BinOpInit final : public OpInit, public FoldingSetNode {
     STRCONCAT,
     INTERLEAVE,
     CONCAT,
+    MATCH,
     EQ,
     NE,
     LE,

diff  --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 590656786bc66..75160e61f3f99 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
@@ -1318,6 +1319,23 @@ const Init *BinOpInit::Fold(const Record *CurRec) const {
     }
     break;
   }
+  case MATCH: {
+    const auto *StrInit = dyn_cast<StringInit>(LHS);
+    if (!StrInit)
+      return this;
+
+    const auto *RegexInit = dyn_cast<StringInit>(RHS);
+    if (!RegexInit)
+      return this;
+
+    StringRef RegexStr = RegexInit->getValue();
+    llvm::Regex Matcher(RegexStr);
+    if (!Matcher.isValid())
+      PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));
+
+    return BitInit::get(LHS->getRecordKeeper(),
+                        Matcher.match(StrInit->getValue()));
+  }
   case LISTCONCAT: {
     const auto *LHSs = dyn_cast<ListInit>(LHS);
     const auto *RHSs = dyn_cast<ListInit>(RHS);
@@ -1586,6 +1604,9 @@ std::string BinOpInit::getAsString() const {
   case RANGEC:
     return LHS->getAsString() + "..." + RHS->getAsString();
   case CONCAT: Result = "!con"; break;
+  case MATCH:
+    Result = "!match";
+    break;
   case ADD: Result = "!add"; break;
   case SUB: Result = "!sub"; break;
   case MUL: Result = "!mul"; break;

diff  --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 983242ade0fe5..0b2f927446b1e 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -644,6 +644,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("tolower", tgtok::XToLower)
           .Case("toupper", tgtok::XToUpper)
           .Case("repr", tgtok::XRepr)
+          .Case("match", tgtok::XMatch)
           .Default(tgtok::Error);
 
   return Kind != tgtok::Error ? Kind

diff  --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 6680915211205..ef9205197decf 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -126,6 +126,7 @@ enum TokKind {
   XInterleave,
   XSubstr,
   XFind,
+  XMatch,
   XCast,
   XSubst,
   XForEach,

diff  --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 9a8301cffb930..787c3e64beac2 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1456,6 +1456,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
   }
 
   case tgtok::XConcat:
+  case tgtok::XMatch:
   case tgtok::XADD:
   case tgtok::XSUB:
   case tgtok::XMUL:
@@ -1488,6 +1489,9 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     switch (OpTok) {
     default: llvm_unreachable("Unhandled code!");
     case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
+    case tgtok::XMatch:
+      Code = BinOpInit::MATCH;
+      break;
     case tgtok::XADD:    Code = BinOpInit::ADD; break;
     case tgtok::XSUB:    Code = BinOpInit::SUB; break;
     case tgtok::XMUL:    Code = BinOpInit::MUL; break;
@@ -1525,6 +1529,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     switch (OpTok) {
     default:
       llvm_unreachable("Unhandled code!");
+    case tgtok::XMatch:
+      Type = BitRecTy::get(Records);
+      ArgType = StringRecTy::get(Records);
+      break;
     case tgtok::XConcat:
     case tgtok::XSetDagOp:
       Type = DagRecTy::get(Records);

diff  --git a/llvm/test/TableGen/match.td b/llvm/test/TableGen/match.td
new file mode 100644
index 0000000000000..743aebd130b0a
--- /dev/null
+++ b/llvm/test/TableGen/match.td
@@ -0,0 +1,36 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
+// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
+// RUN: not llvm-tblgen -DERROR4 %s 2>&1 | FileCheck --check-prefix=ERROR4 %s
+// XFAIL: vg_leak
+
+def test {
+  bit test0 = !match("123 abc ABC", "[0-9 a-z A-Z]+");
+  bit test1 = !match("abc", "[0-9]+");
+}
+
+// CHECK-LABEL: def test {
+// CHECK-NEXT:    bit test0 = 1;
+// CHECK-NEXT:    bit test1 = 0;
+// CHECK-NEXT:  }
+
+#ifdef ERROR1
+defvar error1 = !match(123, ".*");
+// ERROR1: error: expected value of type 'string', got 'int'
+#endif
+
+#ifdef ERROR2
+defvar error2 = !match("abc", 123);
+// ERROR2: error: expected value of type 'string', got 'int'
+#endif
+
+#ifdef ERROR3
+defvar error3 = !match("abc", "abc", "abc");
+// ERROR3: error: expected two operands to operator
+#endif
+
+#ifdef ERROR4
+defvar error4 = !match("abc", "([)]");
+// ERROR4: error: invalid regex '([)]'
+#endif


        


More information about the llvm-commits mailing list