[lld] [ELF] Respect orders of symbol assignments and DEFINED (PR #65866)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 9 15:53:30 PDT 2023


https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/65866:

Fix #64600: the currently implementation is minimal (see
https://reviews.llvm.org/D83758), and an assignment like
`__TEXT_REGION_ORIGIN__ = DEFINED(__TEXT_REGION_ORIGIN__) ? __TEXT_REGION_ORIGIN__ : 0;`
(used by avr-ld) leads to a value of zero (default value in `declareSymbol`),
which is unexpected.

Assign orders to symbol assignments and references so that
for a script-defined symbol, the `DEFINED` results match users'
expectation. I am unclear about GNU ld's exact behavior, but this hopefully
matches its behavior in the majority of cases.


>From b249b87f81e4bc6e43ed80420e5b324c1e1660bb Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sat, 9 Sep 2023 14:46:51 -0700
Subject: [PATCH] [ELF] Respect orders of symbol assignments and DEFINED

Fix #64600: the currently implementation is minimal (see
https://reviews.llvm.org/D83758), and an assignment like
`__TEXT_REGION_ORIGIN__ = DEFINED(__TEXT_REGION_ORIGIN__) ? __TEXT_REGION_ORIGIN__ : 0;`
(used by avr-ld) leads to a value of zero (default value in `declareSymbol`),
which is unexpected.

Assign orders to symbol assignments and references so that
for a script-defined symbol, the `DEFINED` results match users'
expectation. I am unclear about GNU ld's exact behavior, but this hopefully
matches its behavior in the majority of cases.
---
 lld/ELF/Config.h                      |  6 ++++
 lld/ELF/LinkerScript.cpp              |  7 ++++-
 lld/ELF/LinkerScript.h                |  7 +++--
 lld/ELF/ScriptParser.cpp              | 11 +++++--
 lld/test/ELF/linkerscript/define.test | 42 ++++++++++++++++++++++-----
 5 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 679a39805ab4311..58741a4d4b6d6eb 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -476,6 +476,12 @@ struct Ctx {
   // True if we need to reserve two .got entries for local-dynamic TLS model.
   std::atomic<bool> needsTlsLd{false};
 
+  // Each symbol assignment and reference is assigned an increasing order.
+  // Each DEFINED(sym) evaluation checks whether the use happens before a
+  // possible `sym = expr;`.
+  unsigned scriptSymOrderCounter = 1;
+  llvm::DenseMap<const Symbol *, unsigned> scriptSymOrder;
+
   void reset();
 
   llvm::raw_fd_ostream openAuxiliaryFile(llvm::StringRef, std::error_code &);
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 28e9f0461b2d609..2b9f28ce68d685d 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -247,8 +247,13 @@ static void declareSymbol(SymbolAssignment *cmd) {
   Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0,
                  nullptr);
 
-  // We can't calculate final value right now.
+  // If the symbol is already defined, its order is 0 (with absence indicating
+  // 0); otherwise it's assigned the order of the SymbolAssignment.
   Symbol *sym = symtab.insert(cmd->name);
+  if (!sym->isDefined())
+    ctx.scriptSymOrder.insert({sym, cmd->symOrder});
+
+  // We can't calculate final value right now.
   sym->mergeProperties(newSym);
   newSym.overwrite(*sym);
 
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 8b8320f9f18e0fd..2bc5272f8285155 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -86,9 +86,10 @@ struct SectionCommand {
 
 // This represents ". = <expr>" or "<symbol> = <expr>".
 struct SymbolAssignment : SectionCommand {
-  SymbolAssignment(StringRef name, Expr e, std::string loc)
+  SymbolAssignment(StringRef name, Expr e, std::string loc,
+                   unsigned symOrder = 0)
       : SectionCommand(AssignmentKind), name(name), expression(e),
-        location(loc) {}
+        symOrder(symOrder), location(loc) {}
 
   static bool classof(const SectionCommand *c) {
     return c->kind == AssignmentKind;
@@ -105,6 +106,8 @@ struct SymbolAssignment : SectionCommand {
   bool provide = false;
   bool hidden = false;
 
+  unsigned symOrder;
+
   // Holds file name and line number for error reporting.
   std::string location;
 
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 3577e78c0d98ff3..d06863e0c46a5c8 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -1117,7 +1117,8 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
       }
     };
   }
-  return make<SymbolAssignment>(name, e, getCurrentLocation());
+  return make<SymbolAssignment>(name, e, getCurrentLocation(),
+                                ctx.scriptSymOrderCounter++);
 }
 
 // This is an operator-precedence parser to parse a linker
@@ -1465,9 +1466,13 @@ Expr ScriptParser::readPrimary() {
   }
   if (tok == "DEFINED") {
     StringRef name = unquote(readParenLiteral());
+    // Return 1 if s is defined. If the definition is only found in a linker
+    // script, it must happen before this DEFINED.
+    auto order = ctx.scriptSymOrderCounter++;
     return [=] {
-      Symbol *b = symtab.find(name);
-      return (b && b->isDefined()) ? 1 : 0;
+      Symbol *s = symtab.find(name);
+      return s && s->isDefined() && ctx.scriptSymOrder.lookup(s) < order ? 1
+                                                                         : 0;
     };
   }
   if (tok == "LENGTH") {
diff --git a/lld/test/ELF/linkerscript/define.test b/lld/test/ELF/linkerscript/define.test
index 3ecaa11cc5b69e1..89bb0bc28bdef80 100644
--- a/lld/test/ELF/linkerscript/define.test
+++ b/lld/test/ELF/linkerscript/define.test
@@ -1,7 +1,22 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/define.s -o %t.o
-# RUN: ld.lld -o %t --script %s %t.o
-# RUN: llvm-objdump --section-headers %t | FileCheck %s
+# RUN: ld.lld -o %t --defsym vv=1 --script %s %t.o
+# RUN: llvm-readelf -S -s %t | FileCheck %s
+
+# CHECK:      [Nr] Name   Type     Address          Off    Size   ES Flg Lk Inf Al
+# CHECK-NEXT: [ 0]        NULL     0000000000000000 000000 000000 00      0   0  0
+# CHECK-NEXT: [ 1] .foo   PROGBITS 0000000000011000 001000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 2] .bar   PROGBITS 0000000000013000 003000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 3] .test  PROGBITS 0000000000015000 005000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 4] .text  PROGBITS 0000000000015008 005008 000000 00  AX  0   0  4
+
+# CHECK:         Value          Size Type    Bind   Vis       Ndx Name
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS vv
+# CHECK-DAG:  0000000000000009     0 NOTYPE  GLOBAL DEFAULT   ABS ww
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS x1
+# CHECK-DAG:  0000000000000002     0 NOTYPE  GLOBAL DEFAULT   ABS x2
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS y1
+# CHECK-DAG:  0000000000000002     0 NOTYPE  GLOBAL DEFAULT   ABS y2
 
 EXTERN(extern_defined)
 SECTIONS {
@@ -10,10 +25,21 @@ SECTIONS {
   . = DEFINED(notdefined) ? 0x12000 : 0x13000;
   .bar : { *(.bar*) }
   . = DEFINED(extern_defined) ? 0x14000 : 0x15000;
-  .test : { *(.test*) }
-}
 
-# CHECK: 1 .foo  00000008 0000000000011000 DATA
-# CHECK: 2 .bar  00000008 0000000000013000 DATA
-# CHECK: 3 .test  00000008 0000000000015000 DATA
-# CHECK: 4 .text 00000000 0000000000015008 TEXT
+## Take the value from --defsym.
+  vv = DEFINED(vv) ? vv : 9;
+## 9 as ww is undefined.
+  ww = DEFINED(ww) ? ww : 9;
+
+## 1 as xx is not yet defined.
+  x1 = DEFINED(xx) ? 2 : 1;
+  .test : {
+    xx = .;
+    *(.test*)
+  }
+  x2 = DEFINED(xx) ? 2 : 1;
+
+  y1 = DEFINED(yy) ? 2 : 1;
+  yy = .;
+  y2 = DEFINED(yy) ? 2 : 1;
+}



More information about the llvm-commits mailing list