[llvm] 3a675c7 - [TableGen] Add the !substr() bang operator

Mon Dec 21 06:42:39 PST 2020

Author: Paul C. Anagnostopoulos
Date: 2020-12-21T09:41:59-05:00
New Revision: 3a675c777dd5788e2313cb06fb27b01f8a2e7573

URL: https://github.com/llvm/llvm-project/commit/3a675c777dd5788e2313cb06fb27b01f8a2e7573
DIFF: https://github.com/llvm/llvm-project/commit/3a675c777dd5788e2313cb06fb27b01f8a2e7573.diff

LOG: [TableGen] Add the !substr() bang operator

Update the documentation and add a test.

Differential Revision: https://reviews.llvm.org/D93419

Added: 
    llvm/test/TableGen/substr.td

Modified: 
    llvm/docs/TableGen/ProgRef.rst
    llvm/include/llvm/TableGen/Record.h
    llvm/lib/TableGen/Record.cpp
    llvm/lib/TableGen/TGLexer.cpp
    llvm/lib/TableGen/TGLexer.h
    llvm/lib/TableGen/TGParser.cpp
    llvm/lib/TableGen/TGParser.h

Removed: 
    


################################################################################
diff  --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 342b91a0c437..f2ee7a7e549a 100644

--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses:
                : !interleave !isa         !le          !listconcat  !listsplat
                : !lt         !mul         !ne          !not         !or
                : !setdagop   !shl         !size        !sra         !srl
-               : !strconcat  !sub         !subst       !tail        !xor
+               : !strconcat  !sub         !subst       !substr      !tail
+               : !xor
 
 The ``!cond`` operator has a slightly 
diff erent
 syntax compared to other bang operators, so it is defined separately:
@@ -1723,6 +1724,13 @@ and non-0 as true.
     record if the *target* record name equals the *value* record name; otherwise it
     produces the *value*.
 
+``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)``
+    This operator extracts a substring of the given *string*. The starting
+    position of the substring is specified by *start*, which can range
+    between 0 and the length of the string. The length of the substring
+    is specified by *length*; if not specified, the rest of the string is
+    extracted. The *start* and *length* arguments must be integers.
+
 ``!tail(``\ *a*\ ``)``
     This operator produces a new list with all the elements
     of the list *a* except for the zeroth one. (See also ``!head``.)

diff  --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 3010b4dad09a..a0c5b2778547 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode {
 /// !op (X, Y, Z) - Combine two inits.
 class TernOpInit : public OpInit, public FoldingSetNode {
 public:
-  enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG };
+  enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR };
 
 private:
   Init *LHS, *MHS, *RHS;

diff  --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index cbdce04494f3..9c0464d4e1bf 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const {
     }
     break;
   }
+
+  case SUBSTR: {
+    StringInit *LHSs = dyn_cast<StringInit>(LHS);
+    IntInit *MHSi = dyn_cast<IntInit>(MHS);
+    IntInit *RHSi = dyn_cast<IntInit>(RHS);
+    if (LHSs && MHSi && RHSi) {
+      int64_t StringSize = LHSs->getValue().size();
+      int64_t Start = MHSi->getValue();
+      int64_t Length = RHSi->getValue();
+      if (Start < 0 || Start > StringSize)
+        PrintError(CurRec->getLoc(),
+                   Twine("!substr start position is out of range 0...") +
+                       std::to_string(StringSize) + ": " +
+                       std::to_string(Start));
+      if (Length < 0)
+        PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
+      return StringInit::get(LHSs->getValue().substr(Start, Length),
+                             LHSs->getFormat());
+    }
+    break;
+  }
   }
 
   return const_cast<TernOpInit *>(this);
@@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const {
   std::string Result;
   bool UnquotedLHS = false;
   switch (getOpcode()) {
-  case SUBST: Result = "!subst"; break;
-  case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
+  case DAG: Result = "!dag"; break;
   case FILTER: Result = "!filter"; UnquotedLHS = true; break;
+  case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
   case IF: Result = "!if"; break;
-  case DAG: Result = "!dag"; break;
+  case SUBST: Result = "!subst"; break;
+  case SUBSTR: Result = "!substr"; break;
   }
   return (Result + "(" +
           (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +

diff  --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index df0df96f40eb..a45ef6dc10c1 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("listsplat", tgtok::XListSplat)
     .Case("strconcat", tgtok::XStrConcat)
     .Case("interleave", tgtok::XInterleave)
+    .Case("substr", tgtok::XSubstr)
     .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
     .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
     .Default(tgtok::Error);

diff  --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 1856bef3ea9b..ee568849ca88 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -53,9 +53,9 @@ namespace tgtok {
 
     // Bang operators.
     XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
-    XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach,
-    XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA,
-    XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
+    XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast,
+    XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
+    XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
 
     // Boolean literals.
     TrueVal, FalseVal,

diff  --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 2671d29a7272..7308cf1b4924 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -1496,6 +1496,10 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
   }
 
+  case tgtok::XSubstr: {
+    return ParseOperationSubstr(CurRec, ItemType);
+  }
+
   case tgtok::XCond:
     return ParseOperationCond(CurRec, ItemType);
 
@@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() {
   return Type;
 }
 
+/// Parse the !substr operation. Return null on error.
+///
+/// Substr ::= !substr(string, start-int [, length-int]) => string
+Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
+  TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
+  RecTy *Type = StringRecTy::get();
+
+  Lex.Lex(); // eat the operation
+
+  if (!consume(tgtok::l_paren)) {
+    TokError("expected '(' after !substr operator");
+    return nullptr;
+  }
+
+  Init *LHS = ParseValue(CurRec);
+  if (!LHS)
+    return nullptr;
+
+  if (!consume(tgtok::comma)) {
+    TokError("expected ',' in !substr operator");
+    return nullptr;
+  }
+
+  SMLoc MHSLoc = Lex.getLoc();
+  Init *MHS = ParseValue(CurRec);
+  if (!MHS)
+    return nullptr;
+
+  SMLoc RHSLoc = Lex.getLoc();
+  Init *RHS;
+  if (consume(tgtok::comma)) {
+    RHSLoc = Lex.getLoc();
+    RHS = ParseValue(CurRec);
+    if (!RHS)
+      return nullptr;
+  } else {
+    RHS = IntInit::get(SIZE_MAX);
+  }
+
+  if (!consume(tgtok::r_paren)) {
+    TokError("expected ')' in !substr operator");
+    return nullptr;
+  }
+
+  if (ItemType && !Type->typeIsConvertibleTo(ItemType)) {
+    Error(RHSLoc, Twine("expected value of type '") +
+                  ItemType->getAsString() + "', got '" +
+                  Type->getAsString() + "'");
+  }
+
+  TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+  if (!LHSt && !isa<UnsetInit>(LHS)) {
+    TokError("could not determine type of the string in !substr");
+    return nullptr;
+  }
+  if (LHSt && !isa<StringRecTy>(LHSt->getType())) {
+    TokError(Twine("expected string, got type '") +
+             LHSt->getType()->getAsString() + "'");
+    return nullptr;
+  }
+
+  TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+  if (!MHSt && !isa<UnsetInit>(MHS)) {
+    TokError("could not determine type of the start position in !substr");
+    return nullptr;
+  }
+  if (MHSt && !isa<IntRecTy>(MHSt->getType())) {
+    Error(MHSLoc, Twine("expected int, got type '") +
+                      MHSt->getType()->getAsString() + "'");
+    return nullptr;
+  }
+
+  if (RHS) {
+    TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+    if (!RHSt && !isa<UnsetInit>(RHS)) {
+      TokError("could not determine type of the length in !substr");
+      return nullptr;
+    }
+    if (RHSt && !isa<IntRecTy>(RHSt->getType())) {
+      TokError(Twine("expected int, got type '") +
+               RHSt->getType()->getAsString() + "'");
+      return nullptr;
+    }
+  }
+
+  return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
+}
+
 /// Parse the !foreach and !filter operations. Return null on error.
 ///
 /// ForEach ::= !foreach(ID, list-or-dag, expr) => list<expr type>
@@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::XFoldl:
   case tgtok::XForEach:
   case tgtok::XFilter:
-  case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+  case tgtok::XSubst:
+  case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
     return ParseOperation(CurRec, ItemType);
   }
   }

diff  --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index bdeb4d35382b..3ed78a23067f 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -254,6 +254,7 @@ class TGParser {
                        TypedInit *FirstItem = nullptr);
   RecTy *ParseType();
   Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+  Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType);
   Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType);
   Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
   RecTy *ParseOperatorType();

diff  --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td
new file mode 100644
index 000000000000..5efe4ce69215
--- /dev/null
+++ b/llvm/test/TableGen/substr.td
@@ -0,0 +1,81 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+
+defvar claim = "This is the end of the world!";
+
+// CHECK: def Rec1
+// CHECK:   fullNoLength = "This is the end of the world!";
+// CHECK:   fullLength = "This is the end of the world!";
+// CHECK:   thisIsTheEnd = "This is the end";
+// CHECK:   DoorsSong = "the end";
+// CHECK:   finalNoLength = "end of the world!";
+// CHECK:   finalLength = "end of the world!";
+
+def Rec1 {
+  string fullNoLength = !substr(claim, 0);
+  string fullLength = !substr(claim, 0, 999);
+  string thisIsTheEnd = !substr(claim, 0, 15);
+  string DoorsSong = !substr(claim, 8, 7);
+  string finalNoLength = !substr(claim, 12);
+  string finalLength = !substr(claim, 12, !sub(!size(claim), 12));
+}
+
+// CHECK: def Rec2 {
+// CHECK:   lastName = "Flintstone";
+
+def Rec2 {
+  string firstName = "Fred";
+  string name = firstName # " " # "Flintstone";
+  string lastName = !substr(name, !add(!size(firstName), 1));
+}
+
+// CHECK: def Rec3 {
+// CHECK:   test1 = "";
+// CHECK:   test2 = "";
+// CHECK:   test3 = "";
+// CHECK:   test4 = "h";
+// CHECK:   test5 = "hello";
+// CHECK:   test6 = "";
+
+def Rec3 {
+  string test1 = !substr("", 0, 0);
+  string test2 = !substr("", 0, 9);
+  string test3 = !substr("hello", 0, 0);
+  string test4 = !substr("hello", 0, 1);
+  string test5 = !substr("hello", 0, 99);
+  string test6 = !substr("hello", 5, 99);
+}
+
+// CHECK: def Rec4
+// CHECK:   message = "This is the end of the world!";
+// CHECK:   messagePrefix = "This is th...";
+// CHECK:   warning = "Bad message: 'This is th...'";
+
+class C<string msg> {
+  string message = msg;
+  string messagePrefix = !substr(message, 0, 10) # "...";
+}
+
+def Rec4 : C<claim> {
+  string warning = "Bad message: '" # messagePrefix # "'";
+}
+
+#ifdef ERROR1
+
+// ERROR1: expected string, got type 'int'
+// ERROR1: expected int, got type 'bits<3>'
+// ERROR1: expected int, got type 'string'
+// ERROR1: !substr start position is out of range 0...29: 30
+// ERROR1: !substr length must be nonnegative
+
+def Rec8 {
+  string claim1 = !substr(42, 0, 3);
+  string claim2 = !substr(claim, 0b101);
+  string claim3 = !substr(claim, 0, "oops");
+}
+
+def Rec9 {
+  string claim1 = !substr(claim, !add(!size(claim), 1));
+  string claim2 = !substr(claim, 0, -13);
+}
+#endif