[llvm] [TableGen] Add `!instances` operator to get defined records (PR #129680)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 01:26:59 PDT 2025


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/129680

>From 9f3d31bf1f3e7c3dc2c84fe8fad7a2291cb0de1a Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 12 Mar 2025 17:33:02 +0800
Subject: [PATCH 1/5] [TableGen] Add `!records` operator to get defined records

The format is: `!records<T>([regex])`.

This operator produces a list of records whose type is `T`. If
`regex` is provided, only records whose name matches the regular
expression `regex` will be included. The format of `regex` is ERE
(Extended POSIX Regular Expressions).
---
 llvm/docs/TableGen/ProgRef.rst      | 14 +++++--
 llvm/include/llvm/TableGen/Record.h | 36 +++++++++++++++++
 llvm/lib/TableGen/Record.cpp        | 60 +++++++++++++++++++++++++++++
 llvm/lib/TableGen/TGLexer.cpp       |  1 +
 llvm/lib/TableGen/TGLexer.h         |  1 +
 llvm/lib/TableGen/TGParser.cpp      | 43 +++++++++++++++++++++
 llvm/test/TableGen/records.td       | 60 +++++++++++++++++++++++++++++
 7 files changed, 211 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/records.td

diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 0983c6283f7e2..48d3e9a2e1f3c 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -226,10 +226,10 @@ TableGen provides "bang operators" that have a wide variety of uses:
                : !initialized !interleave  !isa         !le          !listconcat
                : !listflatten !listremove  !listsplat   !logtwo      !lt
                : !match       !mul         !ne          !not         !or
-               : !range       !repr        !setdagarg   !setdagname  !setdagop
-               : !shl         !size        !sra         !srl         !strconcat
-               : !sub         !subst       !substr      !tail        !tolower
-               : !toupper     !xor
+               : !range       !records     !repr        !setdagarg   !setdagname
+               : !setdagop    !shl         !size        !sra         !srl
+               : !strconcat   !sub         !subst       !substr      !tail
+               : !tolower     !toupper     !xor
 
 The ``!cond`` operator has a slightly different
 syntax compared to other bang operators, so it is defined separately:
@@ -1920,6 +1920,12 @@ and non-0 as true.
 ``!range(``\ *list*\ ``)``
     Equivalent to ``!range(0, !size(list))``.
 
+``!records<``\ *type*\ ``>([``\ *regex*\ ``])``
+    This operator produces a list of records whose type is *type*. If *regex*
+    is provided, only records whose name matches the regular expression *regex*
+    will be included. The format of *regex* is ERE (Extended POSIX Regular
+    Expressions).
+
 ``!repr(``\ *value*\ ``)``
     Represents *value* as a string. String format for the value is not
     guaranteed to be stable. Intended for debugging purposes only.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index ae505631b5433..3034663ad54bb 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -316,6 +316,7 @@ class Init {
     IK_FoldOpInit,
     IK_IsAOpInit,
     IK_ExistsOpInit,
+    IK_RecordsOpInit,
     IK_AnonymousNameInit,
     IK_StringInit,
     IK_VarInit,
@@ -1192,6 +1193,41 @@ class ExistsOpInit final : public TypedInit, public FoldingSetNode {
   std::string getAsString() const override;
 };
 
+/// !records<type>([regex]) - Produces a list of records whose type is `type`.
+/// If `regex` is provided, only records whose name matches the regular
+/// expression `regex` will be included.
+class RecordsOpInit final : public TypedInit, public FoldingSetNode {
+private:
+  const RecTy *Type;
+  const Init *Regex;
+
+  RecordsOpInit(const RecTy *Type, const Init *Regex)
+      : TypedInit(IK_RecordsOpInit, ListRecTy::get(Type)), Type(Type),
+        Regex(Regex) {}
+
+public:
+  RecordsOpInit(const RecordsOpInit &) = delete;
+  RecordsOpInit &operator=(const RecordsOpInit &) = delete;
+
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_RecordsOpInit;
+  }
+
+  static const RecordsOpInit *get(const RecTy *Type, const Init *Regex);
+
+  void Profile(FoldingSetNodeID &ID) const;
+
+  const Init *Fold() const;
+
+  bool isComplete() const override { return false; }
+
+  const Init *resolveReferences(Resolver &R) const override;
+
+  const Init *getBit(unsigned Bit) const override;
+
+  std::string getAsString() const override;
+};
+
 /// 'Opcode' - Represent a reference to an entire variable object.
 class VarInit final : public TypedInit {
   const Init *VarName;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index c5b9b670b6f42..da95814866d75 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -84,6 +84,7 @@ struct RecordKeeperImpl {
   FoldingSet<FoldOpInit> TheFoldOpInitPool;
   FoldingSet<IsAOpInit> TheIsAOpInitPool;
   FoldingSet<ExistsOpInit> TheExistsOpInitPool;
+  FoldingSet<RecordsOpInit> TheRecordsOpInitPool;
   DenseMap<std::pair<const RecTy *, const Init *>, VarInit *> TheVarInitPool;
   DenseMap<std::pair<const TypedInit *, unsigned>, VarBitInit *>
       TheVarBitInitPool;
@@ -2222,6 +2223,65 @@ std::string ExistsOpInit::getAsString() const {
       .str();
 }
 
+static void ProfileRecordsOpInit(FoldingSetNodeID &ID, const RecTy *Type,
+                                 const Init *Regex) {
+  ID.AddPointer(Type);
+  ID.AddPointer(Regex);
+}
+
+const RecordsOpInit *RecordsOpInit::get(const RecTy *Type, const Init *Regex) {
+  FoldingSetNodeID ID;
+  ProfileRecordsOpInit(ID, Type, Regex);
+
+  detail::RecordKeeperImpl &RK = Regex->getRecordKeeper().getImpl();
+  void *IP = nullptr;
+  if (const RecordsOpInit *I =
+          RK.TheRecordsOpInitPool.FindNodeOrInsertPos(ID, IP))
+    return I;
+
+  RecordsOpInit *I = new (RK.Allocator) RecordsOpInit(Type, Regex);
+  RK.TheRecordsOpInitPool.InsertNode(I, IP);
+  return I;
+}
+
+void RecordsOpInit::Profile(FoldingSetNodeID &ID) const {
+  ProfileRecordsOpInit(ID, Type, Regex);
+}
+
+const Init *RecordsOpInit::Fold() const {
+  const auto *RegexInit = dyn_cast<StringInit>(Regex);
+  if (!RegexInit)
+    return this;
+
+  StringRef RegexStr = RegexInit->getValue();
+  llvm::Regex Matcher(RegexStr);
+  if (!Matcher.isValid())
+    PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'"));
+
+  const RecordKeeper &RK = Type->getRecordKeeper();
+  SmallVector<Init *, 8> Selected;
+  for (auto &Def : RK.getAllDerivedDefinitionsIfDefined(Type->getAsString()))
+    if (Matcher.match(Def->getName()))
+      Selected.push_back(Def->getDefInit());
+
+  return ListInit::get(Selected, Type);
+}
+
+const Init *RecordsOpInit::resolveReferences(Resolver &R) const {
+  const Init *NewRegex = Regex->resolveReferences(R);
+  if (Regex != NewRegex)
+    return get(Type, NewRegex)->Fold();
+  return this;
+}
+
+const Init *RecordsOpInit::getBit(unsigned Bit) const {
+  return VarBitInit::get(this, Bit);
+}
+
+std::string RecordsOpInit::getAsString() const {
+  return "!records<" + Type->getAsString() + ">(" + Regex->getAsString() + ")";
+}
+
 const RecTy *TypedInit::getFieldType(const StringInit *FieldName) const {
   if (const auto *RecordType = dyn_cast<RecordRecTy>(getType())) {
     for (const Record *Rec : RecordType->getClasses()) {
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 0b2f927446b1e..2b158dd4ec80f 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -629,6 +629,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("listsplat", tgtok::XListSplat)
           .Case("listremove", tgtok::XListRemove)
           .Case("range", tgtok::XRange)
+          .Case("records", tgtok::XRecords)
           .Case("strconcat", tgtok::XStrConcat)
           .Case("initialized", tgtok::XInitialized)
           .Case("interleave", tgtok::XInterleave)
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index ef9205197decf..76b5bdacf39c3 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -154,6 +154,7 @@ enum TokKind {
   XToLower,
   XToUpper,
   XRange,
+  XRecords,
   XGetDagArg,
   XGetDagName,
   XSetDagArg,
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 787c3e64beac2..eb6001afada19 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1455,6 +1455,49 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     return (ExistsOpInit::get(Type, Expr))->Fold(CurRec);
   }
 
+  case tgtok::XRecords: {
+    // Value ::= !records '<' Type '>' '(' Regex? ')'
+    Lex.Lex(); // eat the operation.
+
+    const RecTy *Type = ParseOperatorType();
+    if (!Type)
+      return nullptr;
+
+    if (!consume(tgtok::l_paren)) {
+      TokError("expected '(' after type of !records");
+      return nullptr;
+    }
+
+    // The Regex can be optional.
+    const Init *Regex;
+    if (Lex.getCode() != tgtok::r_paren) {
+      SMLoc RegexLoc = Lex.getLoc();
+      Regex = ParseValue(CurRec);
+
+      const auto *RegexType = dyn_cast<TypedInit>(Regex);
+      if (!RegexType) {
+        Error(RegexLoc, "expected string type argument in !records operator");
+        return nullptr;
+      }
+
+      const auto *SType = dyn_cast<StringRecTy>(RegexType->getType());
+      if (!SType) {
+        Error(RegexLoc, "expected string type argument in !records operator");
+        return nullptr;
+      }
+    } else {
+      // Use wildcard when Regex is not specified.
+      Regex = StringInit::get(Records, ".*");
+    }
+
+    if (!consume(tgtok::r_paren)) {
+      TokError("expected ')' in !records");
+      return nullptr;
+    }
+
+    return RecordsOpInit::get(Type, Regex)->Fold();
+  }
+
   case tgtok::XConcat:
   case tgtok::XMatch:
   case tgtok::XADD:
diff --git a/llvm/test/TableGen/records.td b/llvm/test/TableGen/records.td
new file mode 100644
index 0000000000000..0ba402cf62101
--- /dev/null
+++ b/llvm/test/TableGen/records.td
@@ -0,0 +1,60 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
+// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
+// XFAIL: vg_leak
+
+class A;
+def a0 : A;
+def a1 : A;
+
+class B : A;
+def b0 : B;
+def b1 : B;
+
+def records_A {
+  list<A> records = !records<A>();
+}
+
+def records_A_x0 {
+  list<A> records = !records<A>(".*0");
+}
+
+def records_A_x1 {
+  list<A> records = !records<A>(".*1");
+}
+
+def records_B {
+  list<B> records = !records<B>();
+}
+
+// CHECK-LABEL: def records_A {
+// CHECK-NEXT:    list<A> records = [a0, a1, b0, b1];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def records_A_x0 {
+// CHECK-NEXT:    list<A> records = [a0, b0];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def records_A_x1 {
+// CHECK-NEXT:    list<A> records = [a1, b1];
+// CHECK-NEXT:  }
+
+// CHECK-LABEL: def records_B {
+// CHECK-NEXT:    list<B> records = [b0, b1];
+// CHECK-NEXT:  }
+
+#ifdef ERROR1
+defvar error1 = !records<A>(123)
+// ERROR1: error: expected string type argument in !records operator
+#endif
+
+#ifdef ERROR2
+defvar error2 = !records<1>("")
+// ERROR2: error: Unknown token when expecting a type
+#endif
+
+#ifdef ERROR3
+defvar error3 = !records<A>("([)]")
+// ERROR3: error: invalid regex '([)]'
+#endif

>From 57627cecc90cbe75defcdbb68db0b59f03a7487b Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 18 Mar 2025 21:05:28 +0800
Subject: [PATCH 2/5] Rename to instances

---
 llvm/docs/TableGen/ProgRef.rst      | 20 +++++++--------
 llvm/include/llvm/TableGen/Record.h | 18 ++++++-------
 llvm/lib/TableGen/Record.cpp        | 34 ++++++++++++------------
 llvm/lib/TableGen/TGLexer.cpp       |  2 +-
 llvm/lib/TableGen/TGLexer.h         |  2 +-
 llvm/lib/TableGen/TGParser.cpp      | 14 +++++-----
 llvm/test/TableGen/records.td       | 40 ++++++++++++++---------------
 7 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 48d3e9a2e1f3c..d68fe6dd95985 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -223,10 +223,10 @@ TableGen provides "bang operators" that have a wide variety of uses:
                : !div         !empty       !eq          !exists      !filter
                : !find        !foldl       !foreach     !ge          !getdagarg
                : !getdagname  !getdagop    !gt          !head        !if
-               : !initialized !interleave  !isa         !le          !listconcat
-               : !listflatten !listremove  !listsplat   !logtwo      !lt
-               : !match       !mul         !ne          !not         !or
-               : !range       !records     !repr        !setdagarg   !setdagname
+               : !initialized !instances   !interleave  !isa         !le
+               : !listconcat  !listflatten !listremove  !listsplat   !logtwo
+               : !lt          !match       !mul         !ne          !not
+               : !or          !range       !repr        !setdagarg   !setdagname
                : !setdagop    !shl         !size        !sra         !srl
                : !strconcat   !sub         !subst       !substr      !tail
                : !tolower     !toupper     !xor
@@ -1836,6 +1836,12 @@ and non-0 as true.
   This operator produces 1 if *a* is not the uninitialized value (``?``) and 0
   otherwise.
 
+``!instances<``\ *type*\ ``>([``\ *regex*\ ``])``
+    This operator produces a list of records whose type is *type*. If *regex*
+    is provided, only records whose name matches the regular expression *regex*
+    will be included. The format of *regex* is ERE (Extended POSIX Regular
+    Expressions).
+
 ``!interleave(``\ *list*\ ``,`` *delim*\ ``)``
     This operator concatenates the items in the *list*, interleaving the
     *delim* string between each pair, and produces the resulting string.
@@ -1920,12 +1926,6 @@ and non-0 as true.
 ``!range(``\ *list*\ ``)``
     Equivalent to ``!range(0, !size(list))``.
 
-``!records<``\ *type*\ ``>([``\ *regex*\ ``])``
-    This operator produces a list of records whose type is *type*. If *regex*
-    is provided, only records whose name matches the regular expression *regex*
-    will be included. The format of *regex* is ERE (Extended POSIX Regular
-    Expressions).
-
 ``!repr(``\ *value*\ ``)``
     Represents *value* as a string. String format for the value is not
     guaranteed to be stable. Intended for debugging purposes only.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 3034663ad54bb..5fc8fb480e76d 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -316,7 +316,7 @@ class Init {
     IK_FoldOpInit,
     IK_IsAOpInit,
     IK_ExistsOpInit,
-    IK_RecordsOpInit,
+    IK_InstancesOpInit,
     IK_AnonymousNameInit,
     IK_StringInit,
     IK_VarInit,
@@ -1193,27 +1193,27 @@ class ExistsOpInit final : public TypedInit, public FoldingSetNode {
   std::string getAsString() const override;
 };
 
-/// !records<type>([regex]) - Produces a list of records whose type is `type`.
+/// !instances<type>([regex]) - Produces a list of records whose type is `type`.
 /// If `regex` is provided, only records whose name matches the regular
 /// expression `regex` will be included.
-class RecordsOpInit final : public TypedInit, public FoldingSetNode {
+class InstancesOpInit final : public TypedInit, public FoldingSetNode {
 private:
   const RecTy *Type;
   const Init *Regex;
 
-  RecordsOpInit(const RecTy *Type, const Init *Regex)
-      : TypedInit(IK_RecordsOpInit, ListRecTy::get(Type)), Type(Type),
+  InstancesOpInit(const RecTy *Type, const Init *Regex)
+      : TypedInit(IK_InstancesOpInit, ListRecTy::get(Type)), Type(Type),
         Regex(Regex) {}
 
 public:
-  RecordsOpInit(const RecordsOpInit &) = delete;
-  RecordsOpInit &operator=(const RecordsOpInit &) = delete;
+  InstancesOpInit(const InstancesOpInit &) = delete;
+  InstancesOpInit &operator=(const InstancesOpInit &) = delete;
 
   static bool classof(const Init *I) {
-    return I->getKind() == IK_RecordsOpInit;
+    return I->getKind() == IK_InstancesOpInit;
   }
 
-  static const RecordsOpInit *get(const RecTy *Type, const Init *Regex);
+  static const InstancesOpInit *get(const RecTy *Type, const Init *Regex);
 
   void Profile(FoldingSetNodeID &ID) const;
 
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index da95814866d75..7dab201c7c500 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -84,7 +84,7 @@ struct RecordKeeperImpl {
   FoldingSet<FoldOpInit> TheFoldOpInitPool;
   FoldingSet<IsAOpInit> TheIsAOpInitPool;
   FoldingSet<ExistsOpInit> TheExistsOpInitPool;
-  FoldingSet<RecordsOpInit> TheRecordsOpInitPool;
+  FoldingSet<InstancesOpInit> TheInstancesOpInitPool;
   DenseMap<std::pair<const RecTy *, const Init *>, VarInit *> TheVarInitPool;
   DenseMap<std::pair<const TypedInit *, unsigned>, VarBitInit *>
       TheVarBitInitPool;
@@ -2223,32 +2223,33 @@ std::string ExistsOpInit::getAsString() const {
       .str();
 }
 
-static void ProfileRecordsOpInit(FoldingSetNodeID &ID, const RecTy *Type,
-                                 const Init *Regex) {
+static void ProfileInstancesOpInit(FoldingSetNodeID &ID, const RecTy *Type,
+                                   const Init *Regex) {
   ID.AddPointer(Type);
   ID.AddPointer(Regex);
 }
 
-const RecordsOpInit *RecordsOpInit::get(const RecTy *Type, const Init *Regex) {
+const InstancesOpInit *InstancesOpInit::get(const RecTy *Type,
+                                            const Init *Regex) {
   FoldingSetNodeID ID;
-  ProfileRecordsOpInit(ID, Type, Regex);
+  ProfileInstancesOpInit(ID, Type, Regex);
 
   detail::RecordKeeperImpl &RK = Regex->getRecordKeeper().getImpl();
   void *IP = nullptr;
-  if (const RecordsOpInit *I =
-          RK.TheRecordsOpInitPool.FindNodeOrInsertPos(ID, IP))
+  if (const InstancesOpInit *I =
+          RK.TheInstancesOpInitPool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  RecordsOpInit *I = new (RK.Allocator) RecordsOpInit(Type, Regex);
-  RK.TheRecordsOpInitPool.InsertNode(I, IP);
+  InstancesOpInit *I = new (RK.Allocator) InstancesOpInit(Type, Regex);
+  RK.TheInstancesOpInitPool.InsertNode(I, IP);
   return I;
 }
 
-void RecordsOpInit::Profile(FoldingSetNodeID &ID) const {
-  ProfileRecordsOpInit(ID, Type, Regex);
+void InstancesOpInit::Profile(FoldingSetNodeID &ID) const {
+  ProfileInstancesOpInit(ID, Type, Regex);
 }
 
-const Init *RecordsOpInit::Fold() const {
+const Init *InstancesOpInit::Fold() const {
   const auto *RegexInit = dyn_cast<StringInit>(Regex);
   if (!RegexInit)
     return this;
@@ -2267,19 +2268,20 @@ const Init *RecordsOpInit::Fold() const {
   return ListInit::get(Selected, Type);
 }
 
-const Init *RecordsOpInit::resolveReferences(Resolver &R) const {
+const Init *InstancesOpInit::resolveReferences(Resolver &R) const {
   const Init *NewRegex = Regex->resolveReferences(R);
   if (Regex != NewRegex)
     return get(Type, NewRegex)->Fold();
   return this;
 }
 
-const Init *RecordsOpInit::getBit(unsigned Bit) const {
+const Init *InstancesOpInit::getBit(unsigned Bit) const {
   return VarBitInit::get(this, Bit);
 }
 
-std::string RecordsOpInit::getAsString() const {
-  return "!records<" + Type->getAsString() + ">(" + Regex->getAsString() + ")";
+std::string InstancesOpInit::getAsString() const {
+  return "!instances<" + Type->getAsString() + ">(" + Regex->getAsString() +
+         ")";
 }
 
 const RecTy *TypedInit::getFieldType(const StringInit *FieldName) const {
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 2b158dd4ec80f..4da2505cc065a 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -629,10 +629,10 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("listsplat", tgtok::XListSplat)
           .Case("listremove", tgtok::XListRemove)
           .Case("range", tgtok::XRange)
-          .Case("records", tgtok::XRecords)
           .Case("strconcat", tgtok::XStrConcat)
           .Case("initialized", tgtok::XInitialized)
           .Case("interleave", tgtok::XInterleave)
+          .Case("instances", tgtok::XInstances)
           .Case("substr", tgtok::XSubstr)
           .Case("find", tgtok::XFind)
           .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 76b5bdacf39c3..a5b10b37e9886 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -137,6 +137,7 @@ enum TokKind {
   XSize,
   XEmpty,
   XInitialized,
+  XInstances,
   XIf,
   XCond,
   XEq,
@@ -154,7 +155,6 @@ enum TokKind {
   XToLower,
   XToUpper,
   XRange,
-  XRecords,
   XGetDagArg,
   XGetDagName,
   XSetDagArg,
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index eb6001afada19..c59875817e4ec 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1455,8 +1455,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     return (ExistsOpInit::get(Type, Expr))->Fold(CurRec);
   }
 
-  case tgtok::XRecords: {
-    // Value ::= !records '<' Type '>' '(' Regex? ')'
+  case tgtok::XInstances: {
+    // Value ::= !instances '<' Type '>' '(' Regex? ')'
     Lex.Lex(); // eat the operation.
 
     const RecTy *Type = ParseOperatorType();
@@ -1464,7 +1464,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
       return nullptr;
 
     if (!consume(tgtok::l_paren)) {
-      TokError("expected '(' after type of !records");
+      TokError("expected '(' after type of !instances");
       return nullptr;
     }
 
@@ -1476,13 +1476,13 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
 
       const auto *RegexType = dyn_cast<TypedInit>(Regex);
       if (!RegexType) {
-        Error(RegexLoc, "expected string type argument in !records operator");
+        Error(RegexLoc, "expected string type argument in !instances operator");
         return nullptr;
       }
 
       const auto *SType = dyn_cast<StringRecTy>(RegexType->getType());
       if (!SType) {
-        Error(RegexLoc, "expected string type argument in !records operator");
+        Error(RegexLoc, "expected string type argument in !instances operator");
         return nullptr;
       }
     } else {
@@ -1491,11 +1491,11 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
     }
 
     if (!consume(tgtok::r_paren)) {
-      TokError("expected ')' in !records");
+      TokError("expected ')' in !instances");
       return nullptr;
     }
 
-    return RecordsOpInit::get(Type, Regex)->Fold();
+    return InstancesOpInit::get(Type, Regex)->Fold();
   }
 
   case tgtok::XConcat:
diff --git a/llvm/test/TableGen/records.td b/llvm/test/TableGen/records.td
index 0ba402cf62101..6c6a359130aee 100644
--- a/llvm/test/TableGen/records.td
+++ b/llvm/test/TableGen/records.td
@@ -12,49 +12,49 @@ class B : A;
 def b0 : B;
 def b1 : B;
 
-def records_A {
-  list<A> records = !records<A>();
+def instances_A {
+  list<A> instances = !instances<A>();
 }
 
-def records_A_x0 {
-  list<A> records = !records<A>(".*0");
+def instances_A_x0 {
+  list<A> instances = !instances<A>(".*0");
 }
 
-def records_A_x1 {
-  list<A> records = !records<A>(".*1");
+def instances_A_x1 {
+  list<A> instances = !instances<A>(".*1");
 }
 
-def records_B {
-  list<B> records = !records<B>();
+def instances_B {
+  list<B> instances = !instances<B>();
 }
 
-// CHECK-LABEL: def records_A {
-// CHECK-NEXT:    list<A> records = [a0, a1, b0, b1];
+// CHECK-LABEL: def instances_A {
+// CHECK-NEXT:    list<A> instances = [a0, a1, b0, b1];
 // CHECK-NEXT:  }
 
-// CHECK-LABEL: def records_A_x0 {
-// CHECK-NEXT:    list<A> records = [a0, b0];
+// CHECK-LABEL: def instances_A_x0 {
+// CHECK-NEXT:    list<A> instances = [a0, b0];
 // CHECK-NEXT:  }
 
-// CHECK-LABEL: def records_A_x1 {
-// CHECK-NEXT:    list<A> records = [a1, b1];
+// CHECK-LABEL: def instances_A_x1 {
+// CHECK-NEXT:    list<A> instances = [a1, b1];
 // CHECK-NEXT:  }
 
-// CHECK-LABEL: def records_B {
-// CHECK-NEXT:    list<B> records = [b0, b1];
+// CHECK-LABEL: def instances_B {
+// CHECK-NEXT:    list<B> instances = [b0, b1];
 // CHECK-NEXT:  }
 
 #ifdef ERROR1
-defvar error1 = !records<A>(123)
-// ERROR1: error: expected string type argument in !records operator
+defvar error1 = !instances<A>(123)
+// ERROR1: error: expected string type argument in !instances operator
 #endif
 
 #ifdef ERROR2
-defvar error2 = !records<1>("")
+defvar error2 = !instances<1>("")
 // ERROR2: error: Unknown token when expecting a type
 #endif
 
 #ifdef ERROR3
-defvar error3 = !records<A>("([)]")
+defvar error3 = !instances<A>("([)]")
 // ERROR3: error: invalid regex '([)]'
 #endif

>From 2862f5dfa0c8aaf112fec6736f3e0ff61aee9bc1 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 25 Mar 2025 14:34:44 +0800
Subject: [PATCH 3/5] Rename records.td to instances.td

---
 llvm/test/TableGen/instances.td | 134 ++++++++++++++++++++++++++++++++
 llvm/test/TableGen/records.td   |  60 --------------
 2 files changed, 134 insertions(+), 60 deletions(-)
 create mode 100644 llvm/test/TableGen/instances.td
 delete mode 100644 llvm/test/TableGen/records.td

diff --git a/llvm/test/TableGen/instances.td b/llvm/test/TableGen/instances.td
new file mode 100644
index 0000000000000..d84673c3590a3
--- /dev/null
+++ b/llvm/test/TableGen/instances.td
@@ -0,0 +1,134 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
+// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
+// XFAIL: vg_leak
+
+class A;
+def a0 : A;
+def a1 : A;
+
+class B : A;
+def b0 : B;
+def b1 : B;
+
+// CHECK-LABEL: def test0_instances_A {
+// CHECK-NEXT:    list<A> instances = [a0, a1, b0, b1];
+// CHECK-NEXT:  }
+def test0_instances_A {
+  list<A> instances = !instances<A>();
+}
+
+// CHECK-LABEL: def test1_instances_A_x0 {
+// CHECK-NEXT:    list<A> instances = [a0, b0];
+// CHECK-NEXT:  }
+def test1_instances_A_x0 {
+  list<A> instances = !instances<A>(".*0");
+}
+
+// CHECK-LABEL: def test2_instances_A_x1 {
+// CHECK-NEXT:    list<A> instances = [a1, b1];
+// CHECK-NEXT:  }
+def test2_instances_A_x1 {
+  list<A> instances = !instances<A>(".*1");
+}
+
+// CHECK-LABEL: def test3_instances_B {
+// CHECK-NEXT:    list<B> instances = [b0, b1];
+// CHECK-NEXT:  }
+def test3_instances_B {
+  list<B> instances = !instances<B>();
+}
+
+//-----------------------------------------------------------------------------//
+
+def a2 : A;
+def b2 : B;
+
+class ClassTest {
+   list<A> instances_A = !instances<A>();
+   list<B> instances_B = !instances<B>();
+}
+
+def a3 : A;
+def b3 : B;
+
+def test4_in_class_def : ClassTest;
+// CHECK-LABEL: def test4_in_class_def {
+// CHECK-NEXT:    list<A> instances_A = [a0, a1, a2, a3, b0, b1, b2, b3];
+// CHECK-NEXT:    list<B> instances_B = [b0, b1, b2, b3];
+// CHECK-NEXT:  }
+
+//-----------------------------------------------------------------------------//
+// Self-recurrence is not supported, so it won't be count in.
+
+// CHECK-LABEL: def test5_self_recurrence {
+// CHECK-NEXT:    list<A> instances_A = [a0, a1, a2, a3, b0, b1, b2, b3];
+// CHECK-NEXT:  }
+def test5_self_recurrence : A {
+   list<A> instances_A = !instances<A>();
+}
+
+//-----------------------------------------------------------------------------//
+// Test these in multiclasses/loops.
+
+class C {
+  list<C> instances_C = !instances<C>();
+}
+
+multiclass MultiClassTest {
+  foreach i = 0-2 in {
+    def "c"#i : C;
+  }
+}
+
+// CHECK-LABEL: def test6_in_multiclass_def_c0 {
+// CHECK-NEXT:    list<C> instances_C = [];
+// CHECK-NEXT:  }
+// CHECK-LABEL: def test6_in_multiclass_def_c1 {
+// CHECK-NEXT:    list<C> instances_C = [test6_in_multiclass_def_c0];
+// CHECK-NEXT:  }
+// CHECK-LABEL: def test6_in_multiclass_def_c2 {
+// CHECK-NEXT:    list<C> instances_C = [test6_in_multiclass_def_c0, test6_in_multiclass_def_c1];
+// CHECK-NEXT:  }
+defm test6_in_multiclass_def_ : MultiClassTest;
+
+//-----------------------------------------------------------------------------//
+// Default argument will be considered as well.
+
+class TestArgument<B b = B<>> {
+  list<B> instances_B = !instances<B>();
+}
+
+// CHECK-LABEL: def test7_default_arg {
+// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:  }
+def test7_default_arg : TestArgument;
+
+// Temporary actual parameter won't be considered.
+
+// CHECK-LABEL: def test8_anonymous0_arg {
+// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:  }
+// CHECK-LABEL: def test8_anonymous1_arg {
+// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:  }
+def test8_anonymous0_arg : TestArgument<B<>>;
+def test8_anonymous1_arg : TestArgument<B<>>;
+
+//-----------------------------------------------------------------------------//
+
+#ifdef ERROR1
+defvar error1 = !instances<A>(123);
+// ERROR1: error: expected string type argument in !instances operator
+#endif
+
+#ifdef ERROR2
+defvar error2 = !instances<1>("");
+// ERROR2: error: Unknown token when expecting a type
+#endif
+
+#ifdef ERROR3
+defvar error3 = !instances<A>("([)]");
+// ERROR3: error: invalid regex '([)]'
+#endif
diff --git a/llvm/test/TableGen/records.td b/llvm/test/TableGen/records.td
deleted file mode 100644
index 6c6a359130aee..0000000000000
--- a/llvm/test/TableGen/records.td
+++ /dev/null
@@ -1,60 +0,0 @@
-// RUN: llvm-tblgen %s | FileCheck %s
-// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
-// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s
-// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s
-// XFAIL: vg_leak
-
-class A;
-def a0 : A;
-def a1 : A;
-
-class B : A;
-def b0 : B;
-def b1 : B;
-
-def instances_A {
-  list<A> instances = !instances<A>();
-}
-
-def instances_A_x0 {
-  list<A> instances = !instances<A>(".*0");
-}
-
-def instances_A_x1 {
-  list<A> instances = !instances<A>(".*1");
-}
-
-def instances_B {
-  list<B> instances = !instances<B>();
-}
-
-// CHECK-LABEL: def instances_A {
-// CHECK-NEXT:    list<A> instances = [a0, a1, b0, b1];
-// CHECK-NEXT:  }
-
-// CHECK-LABEL: def instances_A_x0 {
-// CHECK-NEXT:    list<A> instances = [a0, b0];
-// CHECK-NEXT:  }
-
-// CHECK-LABEL: def instances_A_x1 {
-// CHECK-NEXT:    list<A> instances = [a1, b1];
-// CHECK-NEXT:  }
-
-// CHECK-LABEL: def instances_B {
-// CHECK-NEXT:    list<B> instances = [b0, b1];
-// CHECK-NEXT:  }
-
-#ifdef ERROR1
-defvar error1 = !instances<A>(123)
-// ERROR1: error: expected string type argument in !instances operator
-#endif
-
-#ifdef ERROR2
-defvar error2 = !instances<1>("")
-// ERROR2: error: Unknown token when expecting a type
-#endif
-
-#ifdef ERROR3
-defvar error3 = !instances<A>("([)]")
-// ERROR3: error: invalid regex '([)]'
-#endif

>From 2fd7a1ca9b415fc7e358db1270d15e3d3f584b45 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 25 Mar 2025 18:10:13 +0800
Subject: [PATCH 4/5] Add more tests and clarify the visibility

---
 llvm/docs/TableGen/ProgRef.rst      | 3 +++
 llvm/include/llvm/TableGen/Record.h | 5 ++++-
 llvm/lib/TableGen/Record.cpp        | 9 ++++++---
 llvm/lib/TableGen/TGParser.cpp      | 2 +-
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index d68fe6dd95985..fe0564d2990df 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -1842,6 +1842,9 @@ and non-0 as true.
     will be included. The format of *regex* is ERE (Extended POSIX Regular
     Expressions).
 
+    If ``!instances`` is in a class/multiclass/foreach, only these records of
+    *type* that have been instantiated will be considered.
+
 ``!interleave(``\ *list*\ ``,`` *delim*\ ``)``
     This operator concatenates the items in the *list*, interleaving the
     *delim* string between each pair, and produces the resulting string.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 5fc8fb480e76d..d3771a3731673 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -1217,7 +1217,7 @@ class InstancesOpInit final : public TypedInit, public FoldingSetNode {
 
   void Profile(FoldingSetNodeID &ID) const;
 
-  const Init *Fold() const;
+  const Init *Fold(const Record *CurRec, bool IsFinal = false) const;
 
   bool isComplete() const override { return false; }
 
@@ -2018,6 +2018,9 @@ class RecordKeeper {
     bool Ins = Defs.try_emplace(std::string(R->getName()), std::move(R)).second;
     (void)Ins;
     assert(Ins && "Record already exists");
+    // Clear cache
+    if (!Cache.empty())
+      Cache.clear();
   }
 
   void addExtraGlobal(StringRef Name, const Init *I) {
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 7dab201c7c500..d182b647aa931 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -2249,7 +2249,10 @@ void InstancesOpInit::Profile(FoldingSetNodeID &ID) const {
   ProfileInstancesOpInit(ID, Type, Regex);
 }
 
-const Init *InstancesOpInit::Fold() const {
+const Init *InstancesOpInit::Fold(const Record *CurRec, bool IsFinal) const {
+  if (CurRec && !IsFinal)
+    return this;
+
   const auto *RegexInit = dyn_cast<StringInit>(Regex);
   if (!RegexInit)
     return this;
@@ -2270,8 +2273,8 @@ const Init *InstancesOpInit::Fold() const {
 
 const Init *InstancesOpInit::resolveReferences(Resolver &R) const {
   const Init *NewRegex = Regex->resolveReferences(R);
-  if (Regex != NewRegex)
-    return get(Type, NewRegex)->Fold();
+  if (Regex != NewRegex || R.isFinal())
+    return get(Type, NewRegex)->Fold(R.getCurrentRecord(), R.isFinal());
   return this;
 }
 
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index c59875817e4ec..3d914b94ff62f 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1495,7 +1495,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) {
       return nullptr;
     }
 
-    return InstancesOpInit::get(Type, Regex)->Fold();
+    return InstancesOpInit::get(Type, Regex)->Fold(CurRec);
   }
 
   case tgtok::XConcat:

>From 66f71ba052fd1df2411883dbb286fa95f19dbb2f Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 26 Mar 2025 12:37:49 +0800
Subject: [PATCH 5/5] Rework argument tests

---
 llvm/test/TableGen/instances.td | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/llvm/test/TableGen/instances.td b/llvm/test/TableGen/instances.td
index d84673c3590a3..a49b2de5f7520 100644
--- a/llvm/test/TableGen/instances.td
+++ b/llvm/test/TableGen/instances.td
@@ -94,27 +94,26 @@ multiclass MultiClassTest {
 defm test6_in_multiclass_def_ : MultiClassTest;
 
 //-----------------------------------------------------------------------------//
-// Default argument will be considered as well.
+// Default argument/temporary actual parameter will be considered as well.
+class D<int n>;
 
-class TestArgument<B b = B<>> {
-  list<B> instances_B = !instances<B>();
+class TestArgument<D d = D<0>> {
+  list<D> instances_D = !instances<D>();
 }
 
 // CHECK-LABEL: def test7_default_arg {
-// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:    list<D> instances_D = [anonymous_0];
 // CHECK-NEXT:  }
 def test7_default_arg : TestArgument;
 
-// Temporary actual parameter won't be considered.
-
 // CHECK-LABEL: def test8_anonymous0_arg {
-// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:    list<D> instances_D = [anonymous_0, anonymous_1];
 // CHECK-NEXT:  }
 // CHECK-LABEL: def test8_anonymous1_arg {
-// CHECK-NEXT:    list<B> instances_B = [anonymous_0, b0, b1, b2, b3];
+// CHECK-NEXT:    list<D> instances_D = [anonymous_0, anonymous_1, anonymous_2];
 // CHECK-NEXT:  }
-def test8_anonymous0_arg : TestArgument<B<>>;
-def test8_anonymous1_arg : TestArgument<B<>>;
+def test8_anonymous0_arg : TestArgument<D<1>>;
+def test8_anonymous1_arg : TestArgument<D<2>>;
 
 //-----------------------------------------------------------------------------//
 



More information about the llvm-commits mailing list