[llvm] [TableGen] Add `!select` operator to select records (PR #129680)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 02:52:32 PDT 2025


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/129680

>From 69b0b7bb031e0068ee87e51c8f1c7f5ca967ed1e Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 12 Mar 2025 17:33:02 +0800
Subject: [PATCH] [TableGen] Add `!defined` operator to get defined records

The format is: `!defined<T>([regex])`.

This operator produces a list of records whose type is `T`. If
`regex` is provided, only records whose name matches the regular
expression `regex` will be included.
---
 llvm/docs/TableGen/ProgRef.rst      | 25 +++++++-----
 llvm/include/llvm/TableGen/Record.h | 36 +++++++++++++++++
 llvm/lib/TableGen/Record.cpp        | 61 +++++++++++++++++++++++++++++
 llvm/lib/TableGen/TGLexer.cpp       |  1 +
 llvm/lib/TableGen/TGLexer.h         |  1 +
 llvm/lib/TableGen/TGParser.cpp      | 43 ++++++++++++++++++++
 llvm/test/TableGen/defined.td       | 60 ++++++++++++++++++++++++++++
 7 files changed, 217 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/TableGen/defined.td

diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index edb97109c9289..b6cf3ee191c5d 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -220,16 +220,16 @@ TableGen provides "bang operators" that have a wide variety of uses:
 .. productionlist::
    BangOperator: one of
                : !add         !and         !cast        !con         !dag
-               : !div         !empty       !eq          !exists      !filter
-               : !find        !foldl       !foreach     !ge          !getdagarg
-               : !getdagname  !getdagop    !gt          !head        !if
-               : !initialized !interleave  !isa         !le          !listconcat
-               : !listflatten !listremove  !listsplat   !logtwo      !lt
-               : !mul         !ne          !not         !or          !range
-               : !repr        !setdagarg   !setdagname  !setdagop    !shl
-               : !size        !sra         !srl         !strconcat   !sub
-               : !subst       !substr      !tail        !tolower     !toupper
-               : !xor
+               : !defined     !div         !empty       !eq          !exists
+               : !filter      !find        !foldl       !foreach     !ge
+               : !getdagarg   !getdagname  !getdagop    !gt          !head
+               : !if          !initialized !interleave  !isa         !le
+               : !listconcat  !listflatten !listremove  !listsplat   !logtwo
+               : !lt          !mul         !ne          !not         !or
+               : !range       !repr        !setdagarg   !setdagname  !setdagop
+               : !shl         !size        !sra         !srl         !strconcat
+               : !sub         !subst       !substr      !tail        !tolower
+               : !toupper     !xor
 
 The ``!cond`` operator has a slightly different
 syntax compared to other bang operators, so it is defined separately:
@@ -1722,6 +1722,11 @@ and non-0 as true.
     Example: ``!dag(op, [a1, a2, ?], ["name1", "name2", "name3"])`` results in
     ``(op a1-value:$name1, a2-value:$name2, ?:$name3)``.
 
+``!defined<``\ *type*\ ``>([``\ *regex*\ ``])``
+    This operator produces a list of records whose type is *type*. If *regex*
+    is provided, only records whose name matches the regular expression *regex*
+    will be included.
+
 ``!div(``\ *a*\ ``,`` *b*\ ``)``
     This operator performs signed division of *a* by *b*, and produces the quotient.
     Division by 0 produces an error. Division of INT64_MIN by -1 produces an error.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 334007524c954..9d70d18551ad8 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -316,6 +316,7 @@ class Init {
     IK_FoldOpInit,
     IK_IsAOpInit,
     IK_ExistsOpInit,
+    IK_DefinedOpInit,
     IK_AnonymousNameInit,
     IK_StringInit,
     IK_VarInit,
@@ -1191,6 +1192,41 @@ class ExistsOpInit final : public TypedInit, public FoldingSetNode {
   std::string getAsString() const override;
 };
 
+/// !defined<type>([regex]) - Produces a list of records whose type is `type`.
+/// If `regex` is provided, only records whose name matches the regular
+/// expression `regex` will be included.
+class DefinedOpInit final : public TypedInit, public FoldingSetNode {
+private:
+  const RecTy *Type;
+  const Init *Regex;
+
+  DefinedOpInit(const RecTy *Type, const Init *Regex)
+      : TypedInit(IK_DefinedOpInit, ListRecTy::get(Type)), Type(Type),
+        Regex(Regex) {}
+
+public:
+  DefinedOpInit(const DefinedOpInit &) = delete;
+  DefinedOpInit &operator=(const DefinedOpInit &) = delete;
+
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_DefinedOpInit;
+  }
+
+  static const DefinedOpInit *get(const RecTy *Type, const Init *Regex);
+
+  void Profile(FoldingSetNodeID &ID) const;
+
+  const Init *Fold() const;
+
+  bool isComplete() const override { return false; }
+
+  const Init *resolveReferences(Resolver &R) const override;
+
+  const Init *getBit(unsigned Bit) const override;
+
+  std::string getAsString() const override;
+};
+
 /// 'Opcode' - Represent a reference to an entire variable object.
 class VarInit final : public TypedInit {
   const Init *VarName;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 590656786bc66..a0c81660a3d32 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
@@ -83,6 +84,7 @@ struct RecordKeeperImpl {
   FoldingSet<FoldOpInit> TheFoldOpInitPool;
   FoldingSet<IsAOpInit> TheIsAOpInitPool;
   FoldingSet<ExistsOpInit> TheExistsOpInitPool;
+  FoldingSet<DefinedOpInit> TheDefinedOpInitPool;
   DenseMap<std::pair<const RecTy *, const Init *>, VarInit *> TheVarInitPool;
   DenseMap<std::pair<const TypedInit *, unsigned>, VarBitInit *>
       TheVarBitInitPool;
@@ -2199,6 +2201,65 @@ std::string ExistsOpInit::getAsString() const {
       .str();
 }
 
+static void ProfileDefinedOpInit(FoldingSetNodeID &ID, const RecTy *Type,
+                                 const Init *Regex) {
+  ID.AddPointer(Type);
+  ID.AddPointer(Regex);
+}
+
+const DefinedOpInit *DefinedOpInit::get(const RecTy *Type, const Init *Regex) {
+  FoldingSetNodeID ID;
+  ProfileDefinedOpInit(ID, Type, Regex);
+
+  detail::RecordKeeperImpl &RK = Regex->getRecordKeeper().getImpl();
+  void *IP = nullptr;
+  if (const DefinedOpInit *I =
+          RK.TheDefinedOpInitPool.FindNodeOrInsertPos(ID, IP))
+    return I;
+
+  DefinedOpInit *I = new (RK.Allocator) DefinedOpInit(Type, Regex);
+  RK.TheDefinedOpInitPool.InsertNode(I, IP);
+  return I;
+}
+
+void DefinedOpInit::Profile(FoldingSetNodeID &ID) const {
+  ProfileDefinedOpInit(ID, Type, Regex);
+}
+
+const Init *DefinedOpInit::Fold() const {
+  const auto *RegexInit = dyn_cast<StringInit>(Regex);
+  if (!RegexInit)
+    return this;
+
+  StringRef RegexStr = RegexInit->getValue();
+  llvm::Regex Matcher(RegexStr);
+  if (!Matcher.isValid())
+    PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));
+
+  const RecordKeeper &RK = Type->getRecordKeeper();
+  SmallVector<Init *, 8> Selected;
+  for (auto &Def : RK.getAllDerivedDefinitionsIfDefined(Type->getAsString()))
+    if (Matcher.match(Def->getName()))
+      Selected.push_back(Def->getDefInit());
+
+  return ListInit::get(Selected, Type);
+}
+
+const Init *DefinedOpInit::resolveReferences(Resolver &R) const {
+  const Init *NewRegex = Regex->resolveReferences(R);
+  if (Regex != NewRegex)
+    return get(Type, NewRegex)->Fold();
+  return this;
+}
+
+const Init *DefinedOpInit::getBit(unsigned Bit) const {
+  return VarBitInit::get(this, Bit);
+}
+
+std::string DefinedOpInit::getAsString() const {
+  return "!defined<" + Type->getAsString() + ">(" + Regex->getAsString() + ")";
+}
+
 const RecTy *TypedInit::getFieldType(const StringInit *FieldName) const {
   if (const auto *RecordType = dyn_cast<RecordRecTy>(getType())) {
     for (const Record *Rec : RecordType->getClasses()) {
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 983242ade0fe5..b777bf1307f55 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -619,6 +619,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("sra", tgtok::XSRA)
           .Case("srl", tgtok::XSRL)
           .Case("cast", tgtok::XCast)
+          .Case("defined", tgtok::XDefined)
           .Case("empty", tgtok::XEmpty)
           .Case("subst", tgtok::XSubst)
           .Case("foldl", tgtok::XFoldl)
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 6680915211205..b04bef5074611 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -134,6 +134,7 @@ enum TokKind {
   XHead,
   XTail,
   XSize,
+  XDefined,
   XEmpty,
   XInitialized,
   XIf,
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 9a8301cffb930..d6ef4cadc68ce 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1455,6 +1455,49 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     return (ExistsOpInit::get(Type, Expr))->Fold(CurRec);
   }
 
+  case tgtok::XDefined: {
+    // Value ::= !defined '<' Type '>' '(' Regex? ')'
+    Lex.Lex(); // eat the operation.
+
+    const RecTy *Type = ParseOperatorType();
+    if (!Type)
+      return nullptr;
+
+    if (!consume(tgtok::l_paren)) {
+      TokError("expected '(' after type of !defined");
+      return nullptr;
+    }
+
+    // The Regex can be optional.
+    const Init *Regex;
+    if (Lex.getCode() != tgtok::r_paren) {
+      SMLoc RegexLoc = Lex.getLoc();
+      Regex = ParseValue(CurRec);
+
+      const auto *RegexType = dyn_cast<TypedInit>(Regex);
+      if (!RegexType) {
+        Error(RegexLoc, "expected string type argument in !defined operator");
+        return nullptr;
+      }
+
+      const auto *SType = dyn_cast<StringRecTy>(RegexType->getType());
+      if (!SType) {
+        Error(RegexLoc, "expected string type argument in !defined operator");
+        return nullptr;
+      }
+    } else {
+      // Use wildcard when Regex is not specified.
+      Regex = StringInit::get(Records, ".*");
+    }
+
+    if (!consume(tgtok::r_paren)) {
+      TokError("expected ')' in !defined");
+      return nullptr;
+    }
+
+    return (DefinedOpInit::get(Type, Regex))->Fold();
+  }
+
   case tgtok::XConcat:
   case tgtok::XADD:
   case tgtok::XSUB:
diff --git a/llvm/test/TableGen/defined.td b/llvm/test/TableGen/defined.td
new file mode 100644
index 0000000000000..7a14f75df5b34
--- /dev/null
+++ b/llvm/test/TableGen/defined.td
@@ -0,0 +1,60 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
+// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
+// XFAIL: vg_leak
+
+class A;
+def a0 : A;
+def a1 : A;
+
+class B : A;
+def b0 : B;
+def b1 : B;
+
+def defined_A {
+  list<A> defined = !defined<A>();
+}
+
+def defined_A_x0 {
+  list<A> defined = !defined<A>(".*0");
+}
+
+def defined_A_x1 {
+  list<A> defined = !defined<A>(".*1");
+}
+
+def defined_B {
+  list<B> defined = !defined<B>();
+}
+
+// CHECK-LABEL: def defined_A {
+// CHECK-NEXT:    list<A> defined = [a0, a1, b0, b1];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def defined_A_x0 {
+// CHECK-NEXT:    list<A> defined = [a0, b0];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def defined_A_x1 {
+// CHECK-NEXT:    list<A> defined = [a1, b1];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def defined_B {
+// CHECK-NEXT:    list<B> defined = [b0, b1];
+// CHECK-NEXT:  }
+
+#ifdef ERROR1
+defvar error1 = !defined<A>(123)
+// ERROR1: error: expected string type argument in !defined operator
+#endif
+
+#ifdef ERROR2
+defvar error2 = !defined<1>("")
+// ERROR2: error: Unknown token when expecting a type
+#endif
+
+#ifdef ERROR3
+defvar error3 = !defined<A>("([)]")
+// ERROR3: error: invalid regex '([)]'
+#endif



More information about the llvm-commits mailing list