[lld] 9328c20 - [ELF] Track line number precisely

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 27 14:46:45 PDT 2024


Author: Fangrui Song
Date: 2024-07-27T14:46:41-07:00
New Revision: 9328c20cc80c4fb8dc86c3141e1be1739e07b3fc

URL: https://github.com/llvm/llvm-project/commit/9328c20cc80c4fb8dc86c3141e1be1739e07b3fc
DIFF: https://github.com/llvm/llvm-project/commit/9328c20cc80c4fb8dc86c3141e1be1739e07b3fc.diff

LOG: [ELF] Track line number precisely

`getLineNumber` is both imprecise (when `INCLUDE` is used) and
inefficient (see https://reviews.llvm.org/D104137). Track line number
precisely now that we have `struct Buffer` abstraction from #100493.

Added: 
    

Modified: 
    lld/ELF/ScriptLexer.cpp
    lld/ELF/ScriptLexer.h
    lld/test/ELF/linkerscript/overlay.test
    lld/test/ELF/linkerscript/sections.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 51282dba53c83..1a4eadbead1ba 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -48,32 +48,6 @@ StringRef ScriptLexer::getLine() {
   return s.substr(0, s.find_first_of("\r\n"));
 }
 
-// Returns 1-based line number of the current token.
-size_t ScriptLexer::getLineNumber() {
-  if (prevTok.empty())
-    return 1;
-  StringRef s = getCurrentMB().getBuffer();
-  const size_t tokOffset = prevTok.data() - s.data();
-
-  // For the first token, or when going backwards, start from the beginning of
-  // the buffer. If this token is after the previous token, start from the
-  // previous token.
-  size_t line = 1;
-  size_t start = 0;
-  if (lastLineNumberOffset > 0 && tokOffset >= lastLineNumberOffset) {
-    start = lastLineNumberOffset;
-    line = lastLineNumber;
-  }
-
-  line += s.substr(start, tokOffset - start).count('\n');
-
-  // Store the line number of this token for reuse.
-  lastLineNumberOffset = tokOffset;
-  lastLineNumber = line;
-
-  return line;
-}
-
 // Returns 0-based column number of the current token.
 size_t ScriptLexer::getColumnNumber() {
   return prevTok.data() - getLine().data();
@@ -81,7 +55,7 @@ size_t ScriptLexer::getColumnNumber() {
 
 std::string ScriptLexer::getCurrentLocation() {
   std::string filename = std::string(getCurrentMB().getBufferIdentifier());
-  return (filename + ":" + Twine(getLineNumber())).str();
+  return (filename + ":" + Twine(prevTokLine)).str();
 }
 
 // We don't want to record cascading errors. Keep only the first one.
@@ -177,6 +151,7 @@ StringRef ScriptLexer::skipSpace(StringRef s) {
         setError("unclosed comment in a linker script");
         return "";
       }
+      curBuf.lineNumber += s.substr(0, e).count('\n');
       s = s.substr(e + 2);
       continue;
     }
@@ -184,13 +159,17 @@ StringRef ScriptLexer::skipSpace(StringRef s) {
       size_t e = s.find('\n', 1);
       if (e == StringRef::npos)
         e = s.size() - 1;
+      else
+        ++curBuf.lineNumber;
       s = s.substr(e + 1);
       continue;
     }
-    size_t size = s.size();
+    StringRef saved = s;
     s = s.ltrim();
-    if (s.size() == size)
+    auto len = saved.size() - s.size();
+    if (len == 0)
       return s;
+    curBuf.lineNumber += saved.substr(0, len).count('\n');
   }
 }
 
@@ -199,6 +178,10 @@ bool ScriptLexer::atEOF() { return eof || errorCount(); }
 
 StringRef ScriptLexer::next() {
   prevTok = peek();
+  // `prevTokLine` is not updated for EOF so that the line number in `setError`
+  // will be more useful.
+  if (prevTok.size())
+    prevTokLine = curBuf.lineNumber;
   return std::exchange(curTok, StringRef(curBuf.s.data(), 0));
 }
 

diff  --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index bc2b5fee618f7..c8efddcb65c0c 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -23,6 +23,7 @@ class ScriptLexer {
     // The remaining content to parse and the filename.
     StringRef s, filename;
     const char *begin = nullptr;
+    size_t lineNumber = 1;
     Buffer() = default;
     Buffer(MemoryBufferRef mb)
         : s(mb.getBuffer()), filename(mb.getBufferIdentifier()),
@@ -44,6 +45,7 @@ class ScriptLexer {
   // curTok holds the cached return value of peek() and is invalid when the
   // expression state changes.
   StringRef curTok;
+  size_t prevTokLine = 1;
   // The inExpr state when curTok is cached.
   bool curTokState = false;
   bool eof = false;
@@ -67,12 +69,8 @@ class ScriptLexer {
   std::vector<MemoryBufferRef> mbs;
   bool inExpr = false;
 
-  size_t lastLineNumber = 0;
-  size_t lastLineNumberOffset = 0;
-
 private:
   StringRef getLine();
-  size_t getLineNumber();
   size_t getColumnNumber();
 };
 

diff  --git a/lld/test/ELF/linkerscript/overlay.test b/lld/test/ELF/linkerscript/overlay.test
index 59b5b6c62aad3..731caea6a5d14 100644
--- a/lld/test/ELF/linkerscript/overlay.test
+++ b/lld/test/ELF/linkerscript/overlay.test
@@ -98,5 +98,5 @@ SECTIONS {
   OVERLAY 0x1000 : AT ( 0x2000 ) {
 
 # RUN: not ld.lld a.o -T unclosed.lds 2>&1 | FileCheck %s --check-prefix=UNCLOSED
-# UNCLOSED:     error: unclosed.lds:1: unexpected EOF
+# UNCLOSED:     error: unclosed.lds:2: unexpected EOF
 # UNCLOSED-NOT: {{.}}

diff  --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s
index dbedd12a9129e..2a27d9e736db5 100644
--- a/lld/test/ELF/linkerscript/sections.s
+++ b/lld/test/ELF/linkerscript/sections.s
@@ -103,7 +103,7 @@ SECTIONS {
    .text : { *(.text) }
 
 # RUN: not ld.lld -T unclosed.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED %s
-#     UNCLOSED:error: unclosed.lds:1: unexpected EOF
+#     UNCLOSED:error: unclosed.lds:2: unexpected EOF
 # UNCLOSED-NOT:{{.}}
 
 #--- unclosed-out.lds


        


More information about the llvm-commits mailing list