[lld] [ELF] PHDRS update while condition and phdrs.s unclose2.lds output (PR #100918)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 11:26:37 PDT 2024
https://github.com/yugier updated https://github.com/llvm/llvm-project/pull/100918
>From ae7e53bb951c96fdc9a37409d49495ee57758cad Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyc4 at uci.edu>
Date: Sat, 27 Jul 2024 23:07:29 -0700
Subject: [PATCH 1/4] [ELF] PHDRS update while condition and phdrs.s
unclose2.lds output
---
lld/ELF/ScriptParser.cpp | 12 ++++++------
lld/test/ELF/linkerscript/phdrs.s | 2 +-
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index c033733877093..85143760f1540 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -524,17 +524,17 @@ void ScriptParser::readPhdrs() {
cmd.name = tok;
cmd.type = readPhdrType();
- while (!errorCount() && !consume(";")) {
- if (consume("FILEHDR"))
+ while (auto tok2 = till(";")) {
+ if (tok2 == "FILEHDR")
cmd.hasFilehdr = true;
- else if (consume("PHDRS"))
+ else if (tok2 == "PHDRS")
cmd.hasPhdrs = true;
- else if (consume("AT"))
+ else if (tok2 == "AT")
cmd.lmaExpr = readParenExpr();
- else if (consume("FLAGS"))
+ else if (tok2 == "FLAGS")
cmd.flags = readParenExpr()().getValue();
else
- setError("unexpected header attribute: " + next());
+ setError("unexpected header attribute: " + tok2);
}
script->phdrsCommands.push_back(cmd);
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index 997f7e39972d2..e43688bcb62f5 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -108,7 +108,7 @@ PHDRS { text PT_LOAD ;
PHDRS { text PT_LOAD
# RUN: not ld.lld -T unclosed2.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED2 %s
-# UNCLOSED2: error: unclosed2.lds:1: unexpected header attribute:
+# UNCLOSED2: error: unclosed2.lds:1: unexpected EOF
#--- a.s
.global _start
>From 38160d4ef5a1fbd25a66fbe5c431fe1e9def6b14 Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyc4 at uci.edu>
Date: Sun, 28 Jul 2024 23:04:51 -0700
Subject: [PATCH 2/4] [ELF] Added readNameTill and replaced some unquote(tok)
---
lld/ELF/ScriptLexer.cpp | 17 +++++++++++++++++
lld/ELF/ScriptLexer.h | 1 +
lld/ELF/ScriptParser.cpp | 23 +++++++++++------------
lld/test/ELF/linkerscript/phdrs.s | 6 ++++++
4 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 9d31b2b226517..20a76bb4b5dd2 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -231,6 +231,23 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
return {};
}
+ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
+ // this behaves like till but expects that token to be an
+ // identify of quoted string
+ StringRef str = next();
+ if (str.starts_with("\""))
+ str = str.substr(1, str.size() - 2);
+ if (str == tok)
+ return {};
+ if (str == "(" || str == ")" || str == "}")
+ setError(tok + " is missing before " + str);
+ if (!atEOF())
+ return {str};
+ prevTok = {};
+ setError("unexpected EOF");
+ return {};
+}
+
// Returns true if S encloses T.
static bool encloses(StringRef s, StringRef t) {
return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index 0c12b984c9e1b..daee97e002b43 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -67,6 +67,7 @@ class ScriptLexer {
bool consume(StringRef tok);
void expect(StringRef expect);
Token till(StringRef tok);
+ Token readNameTill(StringRef tok);
std::string getCurrentLocation();
MemoryBufferRef getCurrentMB();
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 85143760f1540..7870688ce3db2 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -310,8 +310,8 @@ void ScriptParser::readDefsym(StringRef name) {
void ScriptParser::readNoCrossRefs(bool to) {
expect("(");
NoCrossRefCommand cmd{{}, to};
- while (auto tok = till(")"))
- cmd.outputSections.push_back(unquote(tok));
+ while (auto tok = readNameTill(")"))
+ cmd.outputSections.push_back(tok);
if (cmd.outputSections.size() < 2)
warn(getCurrentLocation() + ": ignored with fewer than 2 output sections");
else
@@ -371,8 +371,8 @@ void ScriptParser::readAsNeeded() {
expect("(");
bool orig = config->asNeeded;
config->asNeeded = true;
- while (auto tok = till(")"))
- addFile(unquote(tok));
+ while (auto tok = readNameTill(")"))
+ addFile(tok);
config->asNeeded = orig;
}
@@ -387,8 +387,8 @@ void ScriptParser::readEntry() {
void ScriptParser::readExtern() {
expect("(");
- while (auto tok = till(")"))
- config->undefined.push_back(unquote(tok));
+ while (auto tok = readNameTill(")"))
+ config->undefined.push_back(tok);
}
void ScriptParser::readGroup() {
@@ -420,11 +420,11 @@ void ScriptParser::readInclude() {
void ScriptParser::readInput() {
expect("(");
- while (auto tok = till(")")) {
+ while (auto tok = readNameTill(")")) {
if (tok == "AS_NEEDED")
readAsNeeded();
else
- addFile(unquote(tok));
+ addFile(tok);
}
}
@@ -524,7 +524,7 @@ void ScriptParser::readPhdrs() {
cmd.name = tok;
cmd.type = readPhdrType();
- while (auto tok2 = till(";")) {
+ while (auto tok2 = readNameTill(";")) {
if (tok2 == "FILEHDR")
cmd.hasFilehdr = true;
else if (tok2 == "PHDRS")
@@ -1391,9 +1391,8 @@ std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
uint64_t withFlags = 0;
uint64_t withoutFlags = 0;
expect("(");
- while (!errorCount()) {
- StringRef tok = readName();
- bool without = tok.consume_front("!");
+ while (auto tok = readNameTill(")")) {
+ bool without = tok.str.consume_front("!");
if (std::optional<uint64_t> flag = parseFlag(tok)) {
if (without)
withoutFlags |= *flag;
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index e43688bcb62f5..5559e55012a87 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -110,6 +110,12 @@ PHDRS { text PT_LOAD
# RUN: not ld.lld -T unclosed2.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED2 %s
# UNCLOSED2: error: unclosed2.lds:1: unexpected EOF
+#--- unclosed3.lds
+PHDRS {all PT_LOAD FILEHDR PHDRS }
+
+# RUN: not ld.lld -T unclosed3.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED3 %s
+# UNCLOSED3: error: unclosed3.lds:1: ; is missing before }
+
#--- a.s
.global _start
_start:
>From 1e940112aedef84b6cbce37da4d790cfeb265ac1 Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyuchy at google.com>
Date: Tue, 30 Jul 2024 18:20:40 +0000
Subject: [PATCH 3/4] [ELF] Update readNameTill
---
lld/ELF/ScriptLexer.cpp | 7 +-
lld/ELF/ScriptToken.h | 177 ++++++++++++++++++++++++++++++
lld/test/ELF/linkerscript/phdrs.s | 2 +-
3 files changed, 183 insertions(+), 3 deletions(-)
create mode 100644 lld/ELF/ScriptToken.h
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 20a76bb4b5dd2..f0f1d68b7896e 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -234,13 +234,16 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
// this behaves like till but expects that token to be an
// identify of quoted string
+ assert(tok.size() > 0);
+ assert(tok[0] != '"');
+
StringRef str = next();
if (str.starts_with("\""))
str = str.substr(1, str.size() - 2);
if (str == tok)
return {};
- if (str == "(" || str == ")" || str == "}")
- setError(tok + " is missing before " + str);
+ if (str == ")" || str == "}")
+ setError("'" + tok + "' is missing before '" + str + "'");
if (!atEOF())
return {str};
prevTok = {};
diff --git a/lld/ELF/ScriptToken.h b/lld/ELF/ScriptToken.h
new file mode 100644
index 0000000000000..a9f2b16dccb39
--- /dev/null
+++ b/lld/ELF/ScriptToken.h
@@ -0,0 +1,177 @@
+//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the nums for LinkerScript lexer
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_SCRIPT_TOKENIZER_H
+#define LLD_ELF_SCRIPT_TOKENIZER_H
+
+namespace lld {
+namespace elf {
+enum class Tok {
+ Entry,
+
+ // Commands Files
+ Include,
+ Input,
+ Group,
+ Memory,
+ Output,
+ SearchDir,
+ Startup,
+
+ Insert,
+ After,
+
+ // Commands for object file formats
+ OutputFormat,
+ Target,
+
+ // Other linker script commands
+ Assert,
+ Constant,
+ Extern,
+ // FORCE_COMMON_ALLOCATION
+ // INHIBIT_COMMON_ALLOCATION
+ OutputArch,
+ Nocrossrefs,
+ NocrossrefsTo,
+
+ // Assignment
+ Provide,
+ Hidden,
+ ProvideHidden,
+
+ Sections,
+ Before,
+
+ // Input Section
+ ExcludeFile,
+ Keep,
+ InputSectionFlags,
+
+ // Read section
+ Overlay,
+ Noload,
+ Copy,
+ Info,
+
+ // Output Section
+ OverwriteSections,
+ Subalign,
+ OnlyIfRo,
+ OnlyIfRw,
+ Fill,
+ Sort,
+
+ // Builtin Functions
+ Absolute,
+ Addr,
+ Align,
+ Alignof,
+ // BLOCK, // synonym for ALIGN for compatibility with older linker script
+ DataSegmentAlign,
+ DataSegmentEnd,
+ DataSegmentRelroEnd,
+ Defined,
+ Length,
+ Loadaddr,
+
+ Log2ceil,
+ Max,
+ Min,
+ Origin,
+ SegmentStart,
+ // NEXT, // This function is closely related to ALIGN(exp); unless you use the
+ // MEMORY command to define discontinuous memory for the output file, the two
+ // functions are equivalent.
+ Sizeof,
+ SizeofHeaders,
+
+ // PHDRS Command
+ Filehdr,
+ Phdrs,
+ At,
+ Flags,
+
+ // Version Command
+ Version,
+
+ RegionAlias,
+ AsNeeded,
+ Constructors,
+
+ // Symbolic Constants
+ Maxpagesize,
+ Commonpagesize,
+
+ Error,
+ Eof,
+
+ Identifier,
+ Hexdecimal, // 0x
+ HexdecimalH, // end with H/h
+ Decimal,
+ DecimalK, // end with K/k
+ DecimalM, // end with M/m
+
+ // Symbol tokens
+ LeftCurlyBracket, // {
+ RightCurlyBracket, // }
+ LeftParenthesis, // (
+ RightParenthesis, // )
+ Comma, // ,
+ Semicolon, // ;
+ Colon, // :
+ Asterisk, // *
+ Question, // ?
+ Excalamation, // !
+ Backslash, // "\"
+ Slash, // /
+ Percent, // %
+ Greater, // >
+ Less, // <
+ Minus, // -
+ Plus, // +
+ BitwiseAnd, // &
+ BitwiseXor, // ^
+ BitwiseOr, // |
+ Underscore, // _
+ Dot, // .
+ Quote, // Quoted token. Note that double-quote characters are parts of a token
+ // because, in a glob match context, only unquoted tokens are interpreted as
+ // glob patterns. Double-quoted tokens are literal patterns in that context.
+
+ // Assignmemnt
+ Assign, // =
+ PlusAssign, // +=
+ MinusAssign, // -=
+ MulAssign, // *=
+ DivAssign, // /=
+ LeftShiftAssign, // <<=
+ RightShiftAssign, // >>=
+ AndAssign, // &=
+ OrAssign, // |=
+ XorAssign, // ^=
+
+ // operator token
+ NotEqual, // !=
+ Equal, // ==
+ GreaterEqual, // >=
+ LessEqual, // <=
+ LeftShift, // <<
+ RightShift, // >>
+ LogicalAnd, // &&
+ LogicalOr // ||
+};
+} // namespace elf
+} // namespace lld
+
+#endif // LLD_ELF_SCRIPT_TOKENIZER_H
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index 5559e55012a87..f705368cd77ad 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -114,7 +114,7 @@ PHDRS { text PT_LOAD
PHDRS {all PT_LOAD FILEHDR PHDRS }
# RUN: not ld.lld -T unclosed3.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED3 %s
-# UNCLOSED3: error: unclosed3.lds:1: ; is missing before }
+# UNCLOSED3: error: unclosed3.lds:1: ';' is missing before '}'
#--- a.s
.global _start
>From 5d6c4acad72f810e08b6c20c75cd8fcefcb356da Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyuchy at google.com>
Date: Tue, 30 Jul 2024 18:26:05 +0000
Subject: [PATCH 4/4] [ELF] Fixed readNameTill return quoted string location
---
lld/ELF/ScriptLexer.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index f0f1d68b7896e..113e0f4be9265 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -238,8 +238,11 @@ ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
assert(tok[0] != '"');
StringRef str = next();
- if (str.starts_with("\""))
+ if (str.starts_with("\"")) {
str = str.substr(1, str.size() - 2);
+ if (str != tok)
+ return {str};
+ }
if (str == tok)
return {};
if (str == ")" || str == "}")
More information about the llvm-commits
mailing list