[lld] 9328c20 - [ELF] Track line number precisely
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 27 14:46:45 PDT 2024
Author: Fangrui Song
Date: 2024-07-27T14:46:41-07:00
New Revision: 9328c20cc80c4fb8dc86c3141e1be1739e07b3fc
URL: https://github.com/llvm/llvm-project/commit/9328c20cc80c4fb8dc86c3141e1be1739e07b3fc
DIFF: https://github.com/llvm/llvm-project/commit/9328c20cc80c4fb8dc86c3141e1be1739e07b3fc.diff
LOG: [ELF] Track line number precisely
`getLineNumber` is both imprecise (when `INCLUDE` is used) and
inefficient (see https://reviews.llvm.org/D104137). Track line number
precisely now that we have `struct Buffer` abstraction from #100493.
Added:
Modified:
lld/ELF/ScriptLexer.cpp
lld/ELF/ScriptLexer.h
lld/test/ELF/linkerscript/overlay.test
lld/test/ELF/linkerscript/sections.s
Removed:
################################################################################
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 51282dba53c83..1a4eadbead1ba 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -48,32 +48,6 @@ StringRef ScriptLexer::getLine() {
return s.substr(0, s.find_first_of("\r\n"));
}
-// Returns 1-based line number of the current token.
-size_t ScriptLexer::getLineNumber() {
- if (prevTok.empty())
- return 1;
- StringRef s = getCurrentMB().getBuffer();
- const size_t tokOffset = prevTok.data() - s.data();
-
- // For the first token, or when going backwards, start from the beginning of
- // the buffer. If this token is after the previous token, start from the
- // previous token.
- size_t line = 1;
- size_t start = 0;
- if (lastLineNumberOffset > 0 && tokOffset >= lastLineNumberOffset) {
- start = lastLineNumberOffset;
- line = lastLineNumber;
- }
-
- line += s.substr(start, tokOffset - start).count('\n');
-
- // Store the line number of this token for reuse.
- lastLineNumberOffset = tokOffset;
- lastLineNumber = line;
-
- return line;
-}
-
// Returns 0-based column number of the current token.
size_t ScriptLexer::getColumnNumber() {
return prevTok.data() - getLine().data();
@@ -81,7 +55,7 @@ size_t ScriptLexer::getColumnNumber() {
std::string ScriptLexer::getCurrentLocation() {
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
- return (filename + ":" + Twine(getLineNumber())).str();
+ return (filename + ":" + Twine(prevTokLine)).str();
}
// We don't want to record cascading errors. Keep only the first one.
@@ -177,6 +151,7 @@ StringRef ScriptLexer::skipSpace(StringRef s) {
setError("unclosed comment in a linker script");
return "";
}
+ curBuf.lineNumber += s.substr(0, e).count('\n');
s = s.substr(e + 2);
continue;
}
@@ -184,13 +159,17 @@ StringRef ScriptLexer::skipSpace(StringRef s) {
size_t e = s.find('\n', 1);
if (e == StringRef::npos)
e = s.size() - 1;
+ else
+ ++curBuf.lineNumber;
s = s.substr(e + 1);
continue;
}
- size_t size = s.size();
+ StringRef saved = s;
s = s.ltrim();
- if (s.size() == size)
+ auto len = saved.size() - s.size();
+ if (len == 0)
return s;
+ curBuf.lineNumber += saved.substr(0, len).count('\n');
}
}
@@ -199,6 +178,10 @@ bool ScriptLexer::atEOF() { return eof || errorCount(); }
StringRef ScriptLexer::next() {
prevTok = peek();
+ // `prevTokLine` is not updated for EOF so that the line number in `setError`
+ // will be more useful.
+ if (prevTok.size())
+ prevTokLine = curBuf.lineNumber;
return std::exchange(curTok, StringRef(curBuf.s.data(), 0));
}
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index bc2b5fee618f7..c8efddcb65c0c 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -23,6 +23,7 @@ class ScriptLexer {
// The remaining content to parse and the filename.
StringRef s, filename;
const char *begin = nullptr;
+ size_t lineNumber = 1;
Buffer() = default;
Buffer(MemoryBufferRef mb)
: s(mb.getBuffer()), filename(mb.getBufferIdentifier()),
@@ -44,6 +45,7 @@ class ScriptLexer {
// curTok holds the cached return value of peek() and is invalid when the
// expression state changes.
StringRef curTok;
+ size_t prevTokLine = 1;
// The inExpr state when curTok is cached.
bool curTokState = false;
bool eof = false;
@@ -67,12 +69,8 @@ class ScriptLexer {
std::vector<MemoryBufferRef> mbs;
bool inExpr = false;
- size_t lastLineNumber = 0;
- size_t lastLineNumberOffset = 0;
-
private:
StringRef getLine();
- size_t getLineNumber();
size_t getColumnNumber();
};
diff --git a/lld/test/ELF/linkerscript/overlay.test b/lld/test/ELF/linkerscript/overlay.test
index 59b5b6c62aad3..731caea6a5d14 100644
--- a/lld/test/ELF/linkerscript/overlay.test
+++ b/lld/test/ELF/linkerscript/overlay.test
@@ -98,5 +98,5 @@ SECTIONS {
OVERLAY 0x1000 : AT ( 0x2000 ) {
# RUN: not ld.lld a.o -T unclosed.lds 2>&1 | FileCheck %s --check-prefix=UNCLOSED
-# UNCLOSED: error: unclosed.lds:1: unexpected EOF
+# UNCLOSED: error: unclosed.lds:2: unexpected EOF
# UNCLOSED-NOT: {{.}}
diff --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s
index dbedd12a9129e..2a27d9e736db5 100644
--- a/lld/test/ELF/linkerscript/sections.s
+++ b/lld/test/ELF/linkerscript/sections.s
@@ -103,7 +103,7 @@ SECTIONS {
.text : { *(.text) }
# RUN: not ld.lld -T unclosed.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED %s
-# UNCLOSED:error: unclosed.lds:1: unexpected EOF
+# UNCLOSED:error: unclosed.lds:2: unexpected EOF
# UNCLOSED-NOT:{{.}}
#--- unclosed-out.lds
More information about the llvm-commits
mailing list