[lld] 65a15a5 - [ELF] Respect orders of symbol assignments and DEFINED (#65866)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 10:55:06 PDT 2023


Author: Fangrui Song
Date: 2023-09-11T10:54:49-07:00
New Revision: 65a15a56d5ca0d26ca6d34c31a617f5b26e3cfee

URL: https://github.com/llvm/llvm-project/commit/65a15a56d5ca0d26ca6d34c31a617f5b26e3cfee
DIFF: https://github.com/llvm/llvm-project/commit/65a15a56d5ca0d26ca6d34c31a617f5b26e3cfee.diff

LOG: [ELF] Respect orders of symbol assignments and DEFINED (#65866)

Fix #64600: the currently implementation is minimal (see
https://reviews.llvm.org/D83758), and an assignment like
`__TEXT_REGION_ORIGIN__ = DEFINED(__TEXT_REGION_ORIGIN__) ? __TEXT_REGION_ORIGIN__ : 0;`
(used by avr-ld[1]) leads to a value of zero (default value in `declareSymbol`),
which is unexpected.

Assign orders to symbol assignments and references so that
for a script-defined symbol, the `DEFINED` results match users'
expectation. I am unclear about GNU ld's exact behavior, but this hopefully
matches its behavior in the majority of cases.

[1]: https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=ld/scripttempl/avr.sc

Added: 
    lld/test/ELF/linkerscript/defined.test

Modified: 
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/ELF/ScriptParser.cpp

Removed: 
    lld/test/ELF/linkerscript/define.test


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 679a39805ab4311..3e93a0e654dcac8 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -476,6 +476,12 @@ struct Ctx {
   // True if we need to reserve two .got entries for local-dynamic TLS model.
   std::atomic<bool> needsTlsLd{false};
 
+  // Each symbol assignment and DEFINED(sym) reference is assigned an increasing
+  // order. Each DEFINED(sym) evaluation checks whether the reference happens
+  // before a possible `sym = expr;`.
+  unsigned scriptSymOrderCounter = 1;
+  llvm::DenseMap<const Symbol *, unsigned> scriptSymOrder;
+
   void reset();
 
   llvm::raw_fd_ostream openAuxiliaryFile(llvm::StringRef, std::error_code &);

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 0975a6540099f70..0b24354b3b8df47 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -106,6 +106,8 @@ void Ctx::reset() {
   backwardReferences.clear();
   hasSympart.store(false, std::memory_order_relaxed);
   needsTlsLd.store(false, std::memory_order_relaxed);
+  scriptSymOrderCounter = 1;
+  scriptSymOrder.clear();
 }
 
 llvm::raw_fd_ostream Ctx::openAuxiliaryFile(llvm::StringRef filename,

diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 28e9f0461b2d609..2b9f28ce68d685d 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -247,8 +247,13 @@ static void declareSymbol(SymbolAssignment *cmd) {
   Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0,
                  nullptr);
 
-  // We can't calculate final value right now.
+  // If the symbol is already defined, its order is 0 (with absence indicating
+  // 0); otherwise it's assigned the order of the SymbolAssignment.
   Symbol *sym = symtab.insert(cmd->name);
+  if (!sym->isDefined())
+    ctx.scriptSymOrder.insert({sym, cmd->symOrder});
+
+  // We can't calculate final value right now.
   sym->mergeProperties(newSym);
   newSym.overwrite(*sym);
 

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 8b8320f9f18e0fd..f0a914e65993e2c 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -86,9 +86,9 @@ struct SectionCommand {
 
 // This represents ". = <expr>" or "<symbol> = <expr>".
 struct SymbolAssignment : SectionCommand {
-  SymbolAssignment(StringRef name, Expr e, std::string loc)
+  SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
       : SectionCommand(AssignmentKind), name(name), expression(e),
-        location(loc) {}
+        symOrder(symOrder), location(loc) {}
 
   static bool classof(const SectionCommand *c) {
     return c->kind == AssignmentKind;
@@ -105,6 +105,8 @@ struct SymbolAssignment : SectionCommand {
   bool provide = false;
   bool hidden = false;
 
+  unsigned symOrder;
+
   // Holds file name and line number for error reporting.
   std::string location;
 

diff  --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 3577e78c0d98ff3..1f8592fbb95deda 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -291,7 +291,7 @@ void ScriptParser::readDefsym(StringRef name) {
   Expr e = readExpr();
   if (!atEOF())
     setError("EOF expected, but got " + next());
-  SymbolAssignment *cmd = make<SymbolAssignment>(name, e, getCurrentLocation());
+  auto *cmd = make<SymbolAssignment>(name, e, 0, getCurrentLocation());
   script->sectionCommands.push_back(cmd);
 }
 
@@ -568,7 +568,7 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
       max = std::max(max, cast<OutputDesc>(cmd)->osec.size);
     return addrExpr().getValue() + max;
   };
-  v.push_back(make<SymbolAssignment>(".", moveDot, getCurrentLocation()));
+  v.push_back(make<SymbolAssignment>(".", moveDot, 0, getCurrentLocation()));
   return v;
 }
 
@@ -1047,7 +1047,7 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
 SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
   // Assert expression returns Dot, so this is equal to ".=."
   if (tok == "ASSERT")
-    return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
+    return make<SymbolAssignment>(".", readAssert(), 0, getCurrentLocation());
 
   size_t oldPos = pos;
   SymbolAssignment *cmd = nullptr;
@@ -1117,7 +1117,8 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
       }
     };
   }
-  return make<SymbolAssignment>(name, e, getCurrentLocation());
+  return make<SymbolAssignment>(name, e, ctx.scriptSymOrderCounter++,
+                                getCurrentLocation());
 }
 
 // This is an operator-precedence parser to parse a linker
@@ -1465,9 +1466,13 @@ Expr ScriptParser::readPrimary() {
   }
   if (tok == "DEFINED") {
     StringRef name = unquote(readParenLiteral());
+    // Return 1 if s is defined. If the definition is only found in a linker
+    // script, it must happen before this DEFINED.
+    auto order = ctx.scriptSymOrderCounter++;
     return [=] {
-      Symbol *b = symtab.find(name);
-      return (b && b->isDefined()) ? 1 : 0;
+      Symbol *s = symtab.find(name);
+      return s && s->isDefined() && ctx.scriptSymOrder.lookup(s) < order ? 1
+                                                                         : 0;
     };
   }
   if (tok == "LENGTH") {

diff  --git a/lld/test/ELF/linkerscript/define.test b/lld/test/ELF/linkerscript/define.test
deleted file mode 100644
index 3ecaa11cc5b69e1..000000000000000
--- a/lld/test/ELF/linkerscript/define.test
+++ /dev/null
@@ -1,19 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/define.s -o %t.o
-# RUN: ld.lld -o %t --script %s %t.o
-# RUN: llvm-objdump --section-headers %t | FileCheck %s
-
-EXTERN(extern_defined)
-SECTIONS {
-  . = DEFINED(defined) ? 0x11000 : .;
-  .foo : { *(.foo*) }
-  . = DEFINED(notdefined) ? 0x12000 : 0x13000;
-  .bar : { *(.bar*) }
-  . = DEFINED(extern_defined) ? 0x14000 : 0x15000;
-  .test : { *(.test*) }
-}
-
-# CHECK: 1 .foo  00000008 0000000000011000 DATA
-# CHECK: 2 .bar  00000008 0000000000013000 DATA
-# CHECK: 3 .test  00000008 0000000000015000 DATA
-# CHECK: 4 .text 00000000 0000000000015008 TEXT

diff  --git a/lld/test/ELF/linkerscript/defined.test b/lld/test/ELF/linkerscript/defined.test
new file mode 100644
index 000000000000000..1f21f0cda6ad45a
--- /dev/null
+++ b/lld/test/ELF/linkerscript/defined.test
@@ -0,0 +1,45 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/define.s -o %t.o
+# RUN: ld.lld -o %t --defsym vv=1 --script %s %t.o
+# RUN: llvm-readelf -S -s %t | FileCheck %s
+
+# CHECK:      [Nr] Name   Type     Address          Off    Size   ES Flg Lk Inf Al
+# CHECK-NEXT: [ 0]        NULL     0000000000000000 000000 000000 00      0   0  0
+# CHECK-NEXT: [ 1] .foo   PROGBITS 0000000000011000 001000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 2] .bar   PROGBITS 0000000000013000 003000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 3] .test  PROGBITS 0000000000015000 005000 000008 00   A  0   0  1
+# CHECK-NEXT: [ 4] .text  PROGBITS 0000000000015008 005008 000000 00  AX  0   0  4
+
+# CHECK:         Value          Size Type    Bind   Vis       Ndx Name
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS vv
+# CHECK-DAG:  0000000000000009     0 NOTYPE  GLOBAL DEFAULT   ABS ww
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS x1
+# CHECK-DAG:  0000000000000002     0 NOTYPE  GLOBAL DEFAULT   ABS x2
+# CHECK-DAG:  0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS y1
+# CHECK-DAG:  0000000000000002     0 NOTYPE  GLOBAL DEFAULT   ABS y2
+
+EXTERN(extern_defined)
+SECTIONS {
+  . = DEFINED(defined) ? 0x11000 : .;
+  .foo : { *(.foo*) }
+  . = DEFINED(notdefined) ? 0x12000 : 0x13000;
+  .bar : { *(.bar*) }
+  . = DEFINED(extern_defined) ? 0x14000 : 0x15000;
+
+## Take the value from --defsym.
+  vv = DEFINED(vv) ? vv : 9;
+## 9 as ww is undefined.
+  ww = DEFINED(ww) ? ww : 9;
+
+## 1 as xx is not yet defined.
+  x1 = DEFINED(xx) ? 2 : 1;
+  .test : {
+    xx = .;
+    *(.test*)
+  }
+  x2 = DEFINED(xx) ? 2 : 1;
+
+  y1 = DEFINED(yy) ? 2 : 1;
+  yy = .;
+  y2 = DEFINED(yy) ? 2 : 1;
+}


        


More information about the llvm-commits mailing list