[lld] [ELF] PHDRS update while condition and phdrs.s unclose2.lds output (PR #100918)

Hongyu Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 30 11:26:37 PDT 2024


https://github.com/yugier updated https://github.com/llvm/llvm-project/pull/100918

>From ae7e53bb951c96fdc9a37409d49495ee57758cad Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyc4 at uci.edu>
Date: Sat, 27 Jul 2024 23:07:29 -0700
Subject: [PATCH 1/4] [ELF] PHDRS update while condition and phdrs.s
 unclose2.lds output

---
 lld/ELF/ScriptParser.cpp          | 12 ++++++------
 lld/test/ELF/linkerscript/phdrs.s |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index c033733877093..85143760f1540 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -524,17 +524,17 @@ void ScriptParser::readPhdrs() {
     cmd.name = tok;
     cmd.type = readPhdrType();
 
-    while (!errorCount() && !consume(";")) {
-      if (consume("FILEHDR"))
+    while (auto tok2 = till(";")) {
+      if (tok2 == "FILEHDR")
         cmd.hasFilehdr = true;
-      else if (consume("PHDRS"))
+      else if (tok2 == "PHDRS")
         cmd.hasPhdrs = true;
-      else if (consume("AT"))
+      else if (tok2 == "AT")
         cmd.lmaExpr = readParenExpr();
-      else if (consume("FLAGS"))
+      else if (tok2 == "FLAGS")
         cmd.flags = readParenExpr()().getValue();
       else
-        setError("unexpected header attribute: " + next());
+        setError("unexpected header attribute: " + tok2);
     }
 
     script->phdrsCommands.push_back(cmd);
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index 997f7e39972d2..e43688bcb62f5 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -108,7 +108,7 @@ PHDRS { text PT_LOAD ;
 PHDRS { text PT_LOAD
 
 # RUN: not ld.lld -T unclosed2.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED2 %s
-# UNCLOSED2: error: unclosed2.lds:1: unexpected header attribute:
+# UNCLOSED2: error: unclosed2.lds:1: unexpected EOF
 
 #--- a.s
 .global _start

>From 38160d4ef5a1fbd25a66fbe5c431fe1e9def6b14 Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyc4 at uci.edu>
Date: Sun, 28 Jul 2024 23:04:51 -0700
Subject: [PATCH 2/4] [ELF] Added readNameTill and replaced some unquote(tok)

---
 lld/ELF/ScriptLexer.cpp           | 17 +++++++++++++++++
 lld/ELF/ScriptLexer.h             |  1 +
 lld/ELF/ScriptParser.cpp          | 23 +++++++++++------------
 lld/test/ELF/linkerscript/phdrs.s |  6 ++++++
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 9d31b2b226517..20a76bb4b5dd2 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -231,6 +231,23 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
   return {};
 }
 
+ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
+  // this behaves like till but expects that token to be an
+  // identify of quoted string
+  StringRef str = next();
+  if (str.starts_with("\""))
+    str = str.substr(1, str.size() - 2);
+  if (str == tok)
+    return {};
+  if (str == "(" || str == ")" || str == "}")
+    setError(tok + " is missing before " + str);
+  if (!atEOF())
+    return {str};
+  prevTok = {};
+  setError("unexpected EOF");
+  return {};
+}
+
 // Returns true if S encloses T.
 static bool encloses(StringRef s, StringRef t) {
   return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index 0c12b984c9e1b..daee97e002b43 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -67,6 +67,7 @@ class ScriptLexer {
   bool consume(StringRef tok);
   void expect(StringRef expect);
   Token till(StringRef tok);
+  Token readNameTill(StringRef tok);
   std::string getCurrentLocation();
   MemoryBufferRef getCurrentMB();
 
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 85143760f1540..7870688ce3db2 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -310,8 +310,8 @@ void ScriptParser::readDefsym(StringRef name) {
 void ScriptParser::readNoCrossRefs(bool to) {
   expect("(");
   NoCrossRefCommand cmd{{}, to};
-  while (auto tok = till(")"))
-    cmd.outputSections.push_back(unquote(tok));
+  while (auto tok = readNameTill(")"))
+    cmd.outputSections.push_back(tok);
   if (cmd.outputSections.size() < 2)
     warn(getCurrentLocation() + ": ignored with fewer than 2 output sections");
   else
@@ -371,8 +371,8 @@ void ScriptParser::readAsNeeded() {
   expect("(");
   bool orig = config->asNeeded;
   config->asNeeded = true;
-  while (auto tok = till(")"))
-    addFile(unquote(tok));
+  while (auto tok = readNameTill(")"))
+    addFile(tok);
   config->asNeeded = orig;
 }
 
@@ -387,8 +387,8 @@ void ScriptParser::readEntry() {
 
 void ScriptParser::readExtern() {
   expect("(");
-  while (auto tok = till(")"))
-    config->undefined.push_back(unquote(tok));
+  while (auto tok = readNameTill(")"))
+    config->undefined.push_back(tok);
 }
 
 void ScriptParser::readGroup() {
@@ -420,11 +420,11 @@ void ScriptParser::readInclude() {
 
 void ScriptParser::readInput() {
   expect("(");
-  while (auto tok = till(")")) {
+  while (auto tok = readNameTill(")")) {
     if (tok == "AS_NEEDED")
       readAsNeeded();
     else
-      addFile(unquote(tok));
+      addFile(tok);
   }
 }
 
@@ -524,7 +524,7 @@ void ScriptParser::readPhdrs() {
     cmd.name = tok;
     cmd.type = readPhdrType();
 
-    while (auto tok2 = till(";")) {
+    while (auto tok2 = readNameTill(";")) {
       if (tok2 == "FILEHDR")
         cmd.hasFilehdr = true;
       else if (tok2 == "PHDRS")
@@ -1391,9 +1391,8 @@ std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
   uint64_t withFlags = 0;
   uint64_t withoutFlags = 0;
   expect("(");
-  while (!errorCount()) {
-    StringRef tok = readName();
-    bool without = tok.consume_front("!");
+  while (auto tok = readNameTill(")")) {
+    bool without = tok.str.consume_front("!");
     if (std::optional<uint64_t> flag = parseFlag(tok)) {
       if (without)
         withoutFlags |= *flag;
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index e43688bcb62f5..5559e55012a87 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -110,6 +110,12 @@ PHDRS { text PT_LOAD
 # RUN: not ld.lld -T unclosed2.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED2 %s
 # UNCLOSED2: error: unclosed2.lds:1: unexpected EOF
 
+#--- unclosed3.lds
+PHDRS {all PT_LOAD FILEHDR PHDRS }
+
+# RUN: not ld.lld -T unclosed3.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED3 %s
+# UNCLOSED3: error: unclosed3.lds:1: ; is missing before }
+
 #--- a.s
 .global _start
 _start:

>From 1e940112aedef84b6cbce37da4d790cfeb265ac1 Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyuchy at google.com>
Date: Tue, 30 Jul 2024 18:20:40 +0000
Subject: [PATCH 3/4] [ELF] Update readNameTill

---
 lld/ELF/ScriptLexer.cpp           |   7 +-
 lld/ELF/ScriptToken.h             | 177 ++++++++++++++++++++++++++++++
 lld/test/ELF/linkerscript/phdrs.s |   2 +-
 3 files changed, 183 insertions(+), 3 deletions(-)
 create mode 100644 lld/ELF/ScriptToken.h

diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 20a76bb4b5dd2..f0f1d68b7896e 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -234,13 +234,16 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
 ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
   // this behaves like till but expects that token to be an
   // identify of quoted string
+  assert(tok.size() > 0);
+  assert(tok[0] != '"');
+
   StringRef str = next();
   if (str.starts_with("\""))
     str = str.substr(1, str.size() - 2);
   if (str == tok)
     return {};
-  if (str == "(" || str == ")" || str == "}")
-    setError(tok + " is missing before " + str);
+  if (str == ")" || str == "}")
+    setError("'" + tok + "' is missing before '" + str + "'");
   if (!atEOF())
     return {str};
   prevTok = {};
diff --git a/lld/ELF/ScriptToken.h b/lld/ELF/ScriptToken.h
new file mode 100644
index 0000000000000..a9f2b16dccb39
--- /dev/null
+++ b/lld/ELF/ScriptToken.h
@@ -0,0 +1,177 @@
+//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the nums for LinkerScript lexer
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_SCRIPT_TOKENIZER_H
+#define LLD_ELF_SCRIPT_TOKENIZER_H
+
+namespace lld {
+namespace elf {
+enum class Tok {
+  Entry,
+
+  // Commands Files
+  Include,
+  Input,
+  Group,
+  Memory,
+  Output,
+  SearchDir,
+  Startup,
+
+  Insert,
+  After,
+
+  // Commands for object file formats
+  OutputFormat,
+  Target,
+
+  // Other linker script commands
+  Assert,
+  Constant,
+  Extern,
+  // FORCE_COMMON_ALLOCATION
+  // INHIBIT_COMMON_ALLOCATION
+  OutputArch,
+  Nocrossrefs,
+  NocrossrefsTo,
+
+  // Assignment
+  Provide,
+  Hidden,
+  ProvideHidden,
+
+  Sections,
+  Before,
+
+  // Input Section
+  ExcludeFile,
+  Keep,
+  InputSectionFlags,
+
+  // Read section
+  Overlay,
+  Noload,
+  Copy,
+  Info,
+
+  // Output Section
+  OverwriteSections,
+  Subalign,
+  OnlyIfRo,
+  OnlyIfRw,
+  Fill,
+  Sort,
+
+  // Builtin Functions
+  Absolute,
+  Addr,
+  Align,
+  Alignof,
+  // BLOCK, // synonym for ALIGN for compatibility with older linker script
+  DataSegmentAlign,
+  DataSegmentEnd,
+  DataSegmentRelroEnd,
+  Defined,
+  Length,
+  Loadaddr,
+
+  Log2ceil,
+  Max,
+  Min,
+  Origin,
+  SegmentStart,
+  // NEXT, // This function is closely related to ALIGN(exp); unless you use the
+  // MEMORY command to define discontinuous memory for the output file, the two
+  // functions are equivalent.
+  Sizeof,
+  SizeofHeaders,
+
+  // PHDRS Command
+  Filehdr,
+  Phdrs,
+  At,
+  Flags,
+
+  // Version Command
+  Version,
+
+  RegionAlias,
+  AsNeeded,
+  Constructors,
+
+  // Symbolic Constants
+  Maxpagesize,
+  Commonpagesize,
+
+  Error,
+  Eof,
+
+  Identifier,
+  Hexdecimal,  // 0x
+  HexdecimalH, // end with H/h
+  Decimal,
+  DecimalK, // end with K/k
+  DecimalM, // end with M/m
+
+  // Symbol tokens
+  LeftCurlyBracket,  // {
+  RightCurlyBracket, // }
+  LeftParenthesis,   // (
+  RightParenthesis,  // )
+  Comma,             // ,
+  Semicolon,         // ;
+  Colon,             // :
+  Asterisk,          // *
+  Question,          // ?
+  Excalamation,      // !
+  Backslash,         // "\"
+  Slash,             // /
+  Percent,           // %
+  Greater,           // >
+  Less,              // <
+  Minus,             // -
+  Plus,              // +
+  BitwiseAnd,        // &
+  BitwiseXor,        // ^
+  BitwiseOr,         // |
+  Underscore,        // _
+  Dot,               // .
+  Quote, // Quoted token. Note that double-quote characters are parts of a token
+  // because, in a glob match context, only unquoted tokens are interpreted as
+  // glob patterns. Double-quoted tokens are literal patterns in that context.
+
+  // Assignmemnt
+  Assign,           // =
+  PlusAssign,       // +=
+  MinusAssign,      // -=
+  MulAssign,        // *=
+  DivAssign,        // /=
+  LeftShiftAssign,  // <<=
+  RightShiftAssign, // >>=
+  AndAssign,        // &=
+  OrAssign,         // |=
+  XorAssign,        // ^=
+
+  // operator token
+  NotEqual,     // !=
+  Equal,        // ==
+  GreaterEqual, // >=
+  LessEqual,    // <=
+  LeftShift,    // <<
+  RightShift,   // >>
+  LogicalAnd,   // &&
+  LogicalOr     // ||
+};
+} // namespace elf
+} // namespace lld
+
+#endif // LLD_ELF_SCRIPT_TOKENIZER_H
diff --git a/lld/test/ELF/linkerscript/phdrs.s b/lld/test/ELF/linkerscript/phdrs.s
index 5559e55012a87..f705368cd77ad 100644
--- a/lld/test/ELF/linkerscript/phdrs.s
+++ b/lld/test/ELF/linkerscript/phdrs.s
@@ -114,7 +114,7 @@ PHDRS { text PT_LOAD
 PHDRS {all PT_LOAD FILEHDR PHDRS }
 
 # RUN: not ld.lld -T unclosed3.lds a.o 2>&1 | FileCheck --check-prefix=UNCLOSED3 %s
-# UNCLOSED3: error: unclosed3.lds:1: ; is missing before }
+# UNCLOSED3: error: unclosed3.lds:1: ';' is missing before '}'
 
 #--- a.s
 .global _start

>From 5d6c4acad72f810e08b6c20c75cd8fcefcb356da Mon Sep 17 00:00:00 2001
From: Hongyu Chen <hongyuchy at google.com>
Date: Tue, 30 Jul 2024 18:26:05 +0000
Subject: [PATCH 4/4] [ELF] Fixed readNameTill return quoted string location

---
 lld/ELF/ScriptLexer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index f0f1d68b7896e..113e0f4be9265 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -238,8 +238,11 @@ ScriptLexer::Token ScriptLexer::readNameTill(StringRef tok) {
   assert(tok[0] != '"');
 
   StringRef str = next();
-  if (str.starts_with("\""))
+  if (str.starts_with("\"")) {
     str = str.substr(1, str.size() - 2);
+    if (str != tok)
+      return {str};
+  }
   if (str == tok)
     return {};
   if (str == ")" || str == "}")



More information about the llvm-commits mailing list