[Mlir-commits] [mlir] mlir/Presburger: contribute a free-standing parser (PR #94916)
Jay Foad
llvmlistbot at llvm.org
Thu Jun 27 01:26:21 PDT 2024
================
@@ -0,0 +1,161 @@
+//===- Lexer.cpp - Presburger Lexer Implementation ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lexer for the Presburger textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Lexer.h"
+#include "Token.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace mlir::presburger;
+
+Lexer::Lexer(const llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) {
+ auto bufferID = sourceMgr.getMainFileID();
+ curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
+ curPtr = curBuffer.begin();
+}
+
+/// emitError - Emit an error message and return an Token::error token.
+Token Lexer::emitError(const char *loc, const llvm::Twine &message) {
+ sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), llvm::SourceMgr::DK_Error,
+ message);
+ return formToken(Token::error, loc);
+}
+
+Token Lexer::lexToken() {
+ while (true) {
+ const char *tokStart = curPtr;
+
+ // Lex the next token.
+ switch (*curPtr++) {
+ default:
+ // Handle bare identifiers.
+ if (isalpha(curPtr[-1]))
+ return lexBareIdentifierOrKeyword(tokStart);
+
+ // Unknown character, emit an error.
+ return emitError(tokStart, "unexpected character");
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Handle whitespace.
+ continue;
+
+ case '_':
+ // Handle bare identifiers.
+ return lexBareIdentifierOrKeyword(tokStart);
+
+ case 0:
+ // This may either be a nul character in the source file or may be the EOF
+ // marker that llvm::MemoryBuffer guarantees will be there.
+ if (curPtr - 1 == curBuffer.end())
+ return formToken(Token::eof, tokStart);
+ continue;
+
+ case ':':
+ return formToken(Token::colon, tokStart);
+ case ',':
+ return formToken(Token::comma, tokStart);
+ case '(':
+ return formToken(Token::l_paren, tokStart);
+ case ')':
+ return formToken(Token::r_paren, tokStart);
+ case '{':
+ return formToken(Token::l_brace, tokStart);
+ case '}':
+ return formToken(Token::r_brace, tokStart);
+ case '[':
+ return formToken(Token::l_square, tokStart);
+ case ']':
+ return formToken(Token::r_square, tokStart);
+ case '<':
+ return formToken(Token::less, tokStart);
+ case '>':
+ return formToken(Token::greater, tokStart);
+ case '=':
+ return formToken(Token::equal, tokStart);
+ case '+':
+ return formToken(Token::plus, tokStart);
+ case '*':
+ return formToken(Token::star, tokStart);
+ case '-':
+ if (*curPtr == '>') {
+ ++curPtr;
+ return formToken(Token::arrow, tokStart);
+ }
+ return formToken(Token::minus, tokStart);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return lexNumber(tokStart);
+ }
+ }
+}
+
+/// Lex a bare identifier or keyword that starts with a letter.
+///
+/// bare-id ::= (letter|[_]) (letter|digit|[_$.])*
+/// integer-type ::= `[su]?i[1-9][0-9]*`
+///
+Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
+ // Match the rest of the identifier regex: [0-9a-zA-Z_.$]*
+ while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
+ *curPtr == '$' || *curPtr == '.')
+ ++curPtr;
+
+ // Check to see if this identifier is a keyword.
+ StringRef spelling(tokStart, curPtr - tokStart);
+
+ auto isAllDigit = [](StringRef str) {
+ return llvm::all_of(str, llvm::isDigit);
+ };
+
+ // Check for i123, si456, ui789.
+ if ((spelling.size() > 1 && tokStart[0] == 'i' &&
+ isAllDigit(spelling.drop_front())) ||
+ ((spelling.size() > 2 && tokStart[1] == 'i' &&
+ (tokStart[0] == 's' || tokStart[0] == 'u')) &&
+ isAllDigit(spelling.drop_front(2))))
+ return Token(Token::inttype, spelling);
----------------
jayfoad wrote:
This will accept things like `i001`. Is that OK?
https://github.com/llvm/llvm-project/pull/94916
More information about the Mlir-commits
mailing list