[lld] [ELF] ScriptLexer: generate tokens lazily (PR #100493)

Igor Kudrin via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 25 15:07:19 PDT 2024


================
@@ -123,45 +128,52 @@ void ScriptLexer::tokenize(MemoryBufferRef mb) {
     if (s.starts_with("\"")) {
       size_t e = s.find("\"", 1);
       if (e == StringRef::npos) {
-        StringRef filename = mb.getBufferIdentifier();
         size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
-        error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
+        error(curBuf.filename + ":" + Twine(lineno + 1) + ": unclosed quote");
         return;
       }
 
-      vec.push_back(s.take_front(e + 1));
+      curTok = s.take_front(e + 1);
       s = s.substr(e + 1);
-      continue;
+      return;
     }
 
     // Some operators form separate tokens.
     if (s.starts_with("<<=") || s.starts_with(">>=")) {
-      vec.push_back(s.substr(0, 3));
+      curTok = s.substr(0, 3);
       s = s.substr(3);
-      continue;
+      return;
     }
-    if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-<>&^|", s[0])) ||
+    if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-!<>&^|", s[0])) ||
                          (s[0] == s[1] && strchr("<>&|", s[0])))) {
-      vec.push_back(s.substr(0, 2));
+      curTok = s.substr(0, 2);
       s = s.substr(2);
-      continue;
+      return;
     }
 
-    // Unquoted token. This is more relaxed than tokens in C-like language,
-    // so that you can write "file-name.cpp" as one bare token, for example.
-    size_t pos = s.find_first_not_of(
-        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
-        "0123456789_.$/\\~=+[]*?-!^:");
+    // Unquoted token. The non-expression token is more relaxed than tokens in
+    // C-like languages, so that you can write "file-name.cpp" as one bare
+    // token.
+    size_t pos;
+    if (inExpr) {
+      pos = s.find_first_not_of(
+          "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+          "0123456789_.$");
+      if (pos == 0 && s.size() >= 2 &&
+          is_contained({"==", "!=", "<=", ">=", "<<", ">>"}, s.substr(0, 2)))
----------------
igorkudrin wrote:

Haven't all these tokens except `"=="` been already checked in lines 147-148?

https://github.com/llvm/llvm-project/pull/100493


More information about the llvm-commits mailing list