[llvm-branch-commits] [llvm] e122a71 - [TableGen] Add the !substr() bang operator
Paul C. Anagnostopoulos via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 23 08:04:12 PST 2020
Author: Paul C. Anagnostopoulos
Date: 2020-12-23T10:59:33-05:00
New Revision: e122a71a0a284e669c970e80214c6b3082aa2534
URL: https://github.com/llvm/llvm-project/commit/e122a71a0a284e669c970e80214c6b3082aa2534
DIFF: https://github.com/llvm/llvm-project/commit/e122a71a0a284e669c970e80214c6b3082aa2534.diff
LOG: [TableGen] Add the !substr() bang operator
Update the documentation and add a test.
Build failed: Change SIZE_MAX to std::numeric_limits<int64_t>::max().
Differential Revision: https://reviews.llvm.org/D93419
Added:
llvm/test/TableGen/substr.td
Modified:
llvm/docs/TableGen/ProgRef.rst
llvm/include/llvm/TableGen/Record.h
llvm/lib/TableGen/Record.cpp
llvm/lib/TableGen/TGLexer.cpp
llvm/lib/TableGen/TGLexer.h
llvm/lib/TableGen/TGParser.cpp
llvm/lib/TableGen/TGParser.h
Removed:
################################################################################
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 342b91a0c437..f2ee7a7e549a 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses:
: !interleave !isa !le !listconcat !listsplat
: !lt !mul !ne !not !or
: !setdagop !shl !size !sra !srl
- : !strconcat !sub !subst !tail !xor
+ : !strconcat !sub !subst !substr !tail
+ : !xor
The ``!cond`` operator has a slightly
diff erent
syntax compared to other bang operators, so it is defined separately:
@@ -1723,6 +1724,13 @@ and non-0 as true.
record if the *target* record name equals the *value* record name; otherwise it
produces the *value*.
+``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)``
+ This operator extracts a substring of the given *string*. The starting
+ position of the substring is specified by *start*, which can range
+ between 0 and the length of the string. The length of the substring
+ is specified by *length*; if not specified, the rest of the string is
+ extracted. The *start* and *length* arguments must be integers.
+
``!tail(``\ *a*\ ``)``
This operator produces a new list with all the elements
of the list *a* except for the zeroth one. (See also ``!head``.)
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 3010b4dad09a..a0c5b2778547 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode {
/// !op (X, Y, Z) - Combine two inits.
class TernOpInit : public OpInit, public FoldingSetNode {
public:
- enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG };
+ enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR };
private:
Init *LHS, *MHS, *RHS;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index cbdce04494f3..9c0464d4e1bf 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const {
}
break;
}
+
+ case SUBSTR: {
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ IntInit *MHSi = dyn_cast<IntInit>(MHS);
+ IntInit *RHSi = dyn_cast<IntInit>(RHS);
+ if (LHSs && MHSi && RHSi) {
+ int64_t StringSize = LHSs->getValue().size();
+ int64_t Start = MHSi->getValue();
+ int64_t Length = RHSi->getValue();
+ if (Start < 0 || Start > StringSize)
+ PrintError(CurRec->getLoc(),
+ Twine("!substr start position is out of range 0...") +
+ std::to_string(StringSize) + ": " +
+ std::to_string(Start));
+ if (Length < 0)
+ PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
+ return StringInit::get(LHSs->getValue().substr(Start, Length),
+ LHSs->getFormat());
+ }
+ break;
+ }
}
return const_cast<TernOpInit *>(this);
@@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const {
std::string Result;
bool UnquotedLHS = false;
switch (getOpcode()) {
- case SUBST: Result = "!subst"; break;
- case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
+ case DAG: Result = "!dag"; break;
case FILTER: Result = "!filter"; UnquotedLHS = true; break;
+ case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
case IF: Result = "!if"; break;
- case DAG: Result = "!dag"; break;
+ case SUBST: Result = "!subst"; break;
+ case SUBSTR: Result = "!substr"; break;
}
return (Result + "(" +
(UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index df0df96f40eb..a45ef6dc10c1 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("listsplat", tgtok::XListSplat)
.Case("strconcat", tgtok::XStrConcat)
.Case("interleave", tgtok::XInterleave)
+ .Case("substr", tgtok::XSubstr)
.Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
.Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
.Default(tgtok::Error);
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 1856bef3ea9b..ee568849ca88 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -53,9 +53,9 @@ namespace tgtok {
// Bang operators.
XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
- XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach,
- XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA,
- XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
+ XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast,
+ XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
+ XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
// Boolean literals.
TrueVal, FalseVal,
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 2671d29a7272..ebb66ccffc29 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -25,6 +25,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <limits>
using namespace llvm;
@@ -1496,6 +1497,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
}
+ case tgtok::XSubstr:
+ return ParseOperationSubstr(CurRec, ItemType);
+
case tgtok::XCond:
return ParseOperationCond(CurRec, ItemType);
@@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() {
return Type;
}
+/// Parse the !substr operation. Return null on error.
+///
+/// Substr ::= !substr(string, start-int [, length-int]) => string
+Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
+ TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
+ RecTy *Type = StringRecTy::get();
+
+ Lex.Lex(); // eat the operation
+
+ if (!consume(tgtok::l_paren)) {
+ TokError("expected '(' after !substr operator");
+ return nullptr;
+ }
+
+ Init *LHS = ParseValue(CurRec);
+ if (!LHS)
+ return nullptr;
+
+ if (!consume(tgtok::comma)) {
+ TokError("expected ',' in !substr operator");
+ return nullptr;
+ }
+
+ SMLoc MHSLoc = Lex.getLoc();
+ Init *MHS = ParseValue(CurRec);
+ if (!MHS)
+ return nullptr;
+
+ SMLoc RHSLoc = Lex.getLoc();
+ Init *RHS;
+ if (consume(tgtok::comma)) {
+ RHSLoc = Lex.getLoc();
+ RHS = ParseValue(CurRec);
+ if (!RHS)
+ return nullptr;
+ } else {
+ RHS = IntInit::get(std::numeric_limits<int64_t>::max());
+ }
+
+ if (!consume(tgtok::r_paren)) {
+ TokError("expected ')' in !substr operator");
+ return nullptr;
+ }
+
+ if (ItemType && !Type->typeIsConvertibleTo(ItemType)) {
+ Error(RHSLoc, Twine("expected value of type '") +
+ ItemType->getAsString() + "', got '" +
+ Type->getAsString() + "'");
+ }
+
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+ if (!LHSt && !isa<UnsetInit>(LHS)) {
+ TokError("could not determine type of the string in !substr");
+ return nullptr;
+ }
+ if (LHSt && !isa<StringRecTy>(LHSt->getType())) {
+ TokError(Twine("expected string, got type '") +
+ LHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (!MHSt && !isa<UnsetInit>(MHS)) {
+ TokError("could not determine type of the start position in !substr");
+ return nullptr;
+ }
+ if (MHSt && !isa<IntRecTy>(MHSt->getType())) {
+ Error(MHSLoc, Twine("expected int, got type '") +
+ MHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+
+ if (RHS) {
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+ if (!RHSt && !isa<UnsetInit>(RHS)) {
+ TokError("could not determine type of the length in !substr");
+ return nullptr;
+ }
+ if (RHSt && !isa<IntRecTy>(RHSt->getType())) {
+ TokError(Twine("expected int, got type '") +
+ RHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+ }
+
+ return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
+}
+
/// Parse the !foreach and !filter operations. Return null on error.
///
/// ForEach ::= !foreach(ID, list-or-dag, expr) => list<expr type>
@@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XFoldl:
case tgtok::XForEach:
case tgtok::XFilter:
- case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ case tgtok::XSubst:
+ case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
return ParseOperation(CurRec, ItemType);
}
}
diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index bdeb4d35382b..3ed78a23067f 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -254,6 +254,7 @@ class TGParser {
TypedInit *FirstItem = nullptr);
RecTy *ParseType();
Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+ Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType);
Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType);
Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
RecTy *ParseOperatorType();
diff --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td
new file mode 100644
index 000000000000..5efe4ce69215
--- /dev/null
+++ b/llvm/test/TableGen/substr.td
@@ -0,0 +1,81 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+
+defvar claim = "This is the end of the world!";
+
+// CHECK: def Rec1
+// CHECK: fullNoLength = "This is the end of the world!";
+// CHECK: fullLength = "This is the end of the world!";
+// CHECK: thisIsTheEnd = "This is the end";
+// CHECK: DoorsSong = "the end";
+// CHECK: finalNoLength = "end of the world!";
+// CHECK: finalLength = "end of the world!";
+
+def Rec1 {
+ string fullNoLength = !substr(claim, 0);
+ string fullLength = !substr(claim, 0, 999);
+ string thisIsTheEnd = !substr(claim, 0, 15);
+ string DoorsSong = !substr(claim, 8, 7);
+ string finalNoLength = !substr(claim, 12);
+ string finalLength = !substr(claim, 12, !sub(!size(claim), 12));
+}
+
+// CHECK: def Rec2 {
+// CHECK: lastName = "Flintstone";
+
+def Rec2 {
+ string firstName = "Fred";
+ string name = firstName # " " # "Flintstone";
+ string lastName = !substr(name, !add(!size(firstName), 1));
+}
+
+// CHECK: def Rec3 {
+// CHECK: test1 = "";
+// CHECK: test2 = "";
+// CHECK: test3 = "";
+// CHECK: test4 = "h";
+// CHECK: test5 = "hello";
+// CHECK: test6 = "";
+
+def Rec3 {
+ string test1 = !substr("", 0, 0);
+ string test2 = !substr("", 0, 9);
+ string test3 = !substr("hello", 0, 0);
+ string test4 = !substr("hello", 0, 1);
+ string test5 = !substr("hello", 0, 99);
+ string test6 = !substr("hello", 5, 99);
+}
+
+// CHECK: def Rec4
+// CHECK: message = "This is the end of the world!";
+// CHECK: messagePrefix = "This is th...";
+// CHECK: warning = "Bad message: 'This is th...'";
+
+class C<string msg> {
+ string message = msg;
+ string messagePrefix = !substr(message, 0, 10) # "...";
+}
+
+def Rec4 : C<claim> {
+ string warning = "Bad message: '" # messagePrefix # "'";
+}
+
+#ifdef ERROR1
+
+// ERROR1: expected string, got type 'int'
+// ERROR1: expected int, got type 'bits<3>'
+// ERROR1: expected int, got type 'string'
+// ERROR1: !substr start position is out of range 0...29: 30
+// ERROR1: !substr length must be nonnegative
+
+def Rec8 {
+ string claim1 = !substr(42, 0, 3);
+ string claim2 = !substr(claim, 0b101);
+ string claim3 = !substr(claim, 0, "oops");
+}
+
+def Rec9 {
+ string claim1 = !substr(claim, !add(!size(claim), 1));
+ string claim2 = !substr(claim, 0, -13);
+}
+#endif
More information about the llvm-branch-commits
mailing list