[lld] 4fc2557 - [WebAssembly][lld] Preassign table number 0 to indirect function table for MVP inputs

Andy Wingo via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 12 11:21:22 PST 2021


Author: Andy Wingo
Date: 2021-02-12T20:20:19+01:00
New Revision: 4fc25573089c53804a035080739867ee3b346fdd

URL: https://github.com/llvm/llvm-project/commit/4fc25573089c53804a035080739867ee3b346fdd
DIFF: https://github.com/llvm/llvm-project/commit/4fc25573089c53804a035080739867ee3b346fdd.diff

LOG: [WebAssembly][lld] Preassign table number 0 to indirect function table for MVP inputs

MVP object files may import at most one table, and if they do, it must
be assigned table number zero in the output, as the references to that
table are not relocatable.  Ensure that this is the case, even if some
inputs define other tables.

Differential Revision: https://reviews.llvm.org/D96001

Added: 
    lld/test/wasm/invalid-mvp-table-use.s

Modified: 
    lld/test/wasm/shared.ll
    lld/wasm/Config.h
    lld/wasm/Driver.cpp
    lld/wasm/InputFiles.cpp
    lld/wasm/InputFiles.h
    lld/wasm/Symbols.cpp
    lld/wasm/SyntheticSections.cpp
    lld/wasm/SyntheticSections.h
    lld/wasm/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/test/wasm/invalid-mvp-table-use.s b/lld/test/wasm/invalid-mvp-table-use.s
new file mode 100644
index 000000000000..b4f12a7eeb9a
--- /dev/null
+++ b/lld/test/wasm/invalid-mvp-table-use.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+#
+# If any table is defined or declared besides the __indirect_function_table,
+# the compilation unit should be compiled with -mattr=+reference-types,
+# causing symbol table entries to be emitted for all tables.
+# RUN: not wasm-ld --no-entry %t.o -o %t.wasm 2>&1 | FileCheck -check-prefix=CHECK-ERR %s
+
+.global call_indirect
+call_indirect:
+        .functype call_indirect () -> ()
+	i32.const 1
+        call_indirect () -> ()
+        end_function
+
+.globl table
+table:
+        .tabletype table, externref
+
+# CHECK-ERR: expected one symbol table entry for each of the 2 table(s) present, but got 1 symbol(s) instead.

diff  --git a/lld/test/wasm/shared.ll b/lld/test/wasm/shared.ll
index 28ef9b6470d1..ecfc0e3062af 100644
--- a/lld/test/wasm/shared.ll
+++ b/lld/test/wasm/shared.ll
@@ -69,6 +69,14 @@ declare void @func_external()
 ; CHECK-NEXT:         Memory:
 ; CHECK-NEXT:           Initial:       0x1
 ; CHECK-NEXT:       - Module:          env
+; CHECK-NEXT:         Field:           __indirect_function_table
+; CHECK-NEXT:         Kind:            TABLE
+; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
+; CHECK-NEXT:           ElemType:        FUNCREF
+; CHECK-NEXT:           Limits:
+; CHECK-NEXT:             Initial:         0x2
+; CHECK-NEXT:       - Module:          env
 ; CHECK-NEXT:         Field:           __stack_pointer
 ; CHECK-NEXT:         Kind:            GLOBAL
 ; CHECK-NEXT:         GlobalType:      I32
@@ -87,14 +95,6 @@ declare void @func_external()
 ; CHECK-NEXT:         Field:           func_external
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         SigIndex:        1
-; CHECK-NEXT:       - Module:          env
-; CHECK-NEXT:         Field:           __indirect_function_table
-; CHECK-NEXT:         Kind:            TABLE
-; CHECK-NEXT:         Table:
-; CHECK-NEXT:           Index:           0
-; CHECK-NEXT:           ElemType:        FUNCREF
-; CHECK-NEXT:           Limits:
-; CHECK-NEXT:             Initial:         0x2
 ; CHECK-NEXT:       - Module:          GOT.mem
 ; CHECK-NEXT:         Field:           indirect_func
 ; CHECK-NEXT:         Kind:            GLOBAL

diff  --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index f18debfb1f83..b91c3d9637a7 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -83,6 +83,10 @@ struct Configuration {
   // True if we are creating position-independent code.
   bool isPic;
 
+  // True if we have an MVP input that uses __indirect_function_table and which
+  // requires it to be allocated to table number 0.
+  bool legacyFunctionTable = false;
+
   // The table offset at which to place function addresses.  We reserve zero
   // for the null function pointer.  This gets set to 1 for executables and 0
   // for shared libraries (since they always added to a dynamic offset at

diff  --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 3e2786d2b59d..a3bcfb715481 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -815,14 +815,14 @@ static TableSymbol *createUndefinedIndirectFunctionTable(StringRef name) {
 }
 
 static TableSymbol *resolveIndirectFunctionTable() {
-  Symbol *existingTable = symtab->find(functionTableName);
-  if (existingTable) {
-    if (!isa<TableSymbol>(existingTable)) {
+  Symbol *existing = symtab->find(functionTableName);
+  if (existing) {
+    if (!isa<TableSymbol>(existing)) {
       error(Twine("reserved symbol must be of type table: `") +
             functionTableName + "`");
       return nullptr;
     }
-    if (existingTable->isDefined()) {
+    if (existing->isDefined()) {
       error(Twine("reserved symbol must not be defined in input files: `") +
             functionTableName + "`");
       return nullptr;
@@ -830,12 +830,11 @@ static TableSymbol *resolveIndirectFunctionTable() {
   }
 
   if (config->importTable) {
-    if (existingTable)
-      return cast<TableSymbol>(existingTable);
+    if (existing)
+      return cast<TableSymbol>(existing);
     else
       return createUndefinedIndirectFunctionTable(functionTableName);
-  } else if ((existingTable && existingTable->isLive()) ||
-             config->exportTable) {
+  } else if ((existing && existing->isLive()) || config->exportTable) {
     // A defined table is required.  Either because the user request an exported
     // table or because the table symbol is already live.  The existing table is
     // guaranteed to be undefined due to the check above.

diff  --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 5f6fb9bd3faa..684c1ddc403e 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -308,69 +308,109 @@ static void setRelocs(const std::vector<T *> &chunks,
   }
 }
 
-// Since LLVM 12, we expect that if an input file defines or uses a table, it
-// declares the tables using symbols and records each use with a relocation.
-// This way when the linker combines inputs, it can collate the tables used by
-// the inputs, assigning them distinct table numbers, and renumber all the uses
-// as appropriate.  At the same time, the linker has special logic to build the
+// An object file can have two approaches to tables.  With the reference-types
+// feature enabled, input files that define or use tables declare the tables
+// using symbols, and record each use with a relocation.  This way when the
+// linker combines inputs, it can collate the tables used by the inputs,
+// assigning them distinct table numbers, and renumber all the uses as
+// appropriate.  At the same time, the linker has special logic to build the
 // indirect function table if it is needed.
 //
-// However, object files produced by LLVM 11 and earlier neither write table
-// symbols nor record relocations, and yet still use tables via call_indirect,
-// and via function pointer bitcasts.  We can detect these object files, as they
-// declare tables as imports or define them locally, but don't have table
-// symbols.  synthesizeTableSymbols serves as a shim when loading these older
-// input files, defining the missing symbols to allow the indirect function
-// table to be built.
+// However, MVP object files (those that target WebAssembly 1.0, the "minimum
+// viable product" version of WebAssembly) neither write table symbols nor
+// record relocations.  These files can have at most one table, the indirect
+// function table used by call_indirect and which is the address space for
+// function pointers.  If this table is present, it is always an import.  If we
+// have a file with a table import but no table symbols, it is an MVP object
+// file.  synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when
+// loading these input files, defining the missing symbol to allow the indirect
+// function table to be built.
 //
-// Table uses in these older files won't be relocated, as they have no
-// relocations.  In practice this isn't a problem, as these object files
-// typically just declare a single table named __indirect_function_table and
-// having table number 0, so relocation would be idempotent anyway.
-void ObjFile::synthesizeTableSymbols() {
-  uint32_t tableNumber = 0;
-  const WasmGlobalType *globalType = nullptr;
-  const WasmEventType *eventType = nullptr;
-  const WasmSignature *signature = nullptr;
-  if (wasmObj->getNumImportedTables()) {
-    for (const auto &import : wasmObj->imports()) {
-      if (import.Kind == WASM_EXTERNAL_TABLE) {
-        auto *info = make<WasmSymbolInfo>();
-        info->Name = import.Field;
-        info->Kind = WASM_SYMBOL_TYPE_TABLE;
-        info->ImportModule = import.Module;
-        info->ImportName = import.Field;
-        info->Flags = WASM_SYMBOL_UNDEFINED;
-        info->Flags |= WASM_SYMBOL_NO_STRIP;
-        info->ElementIndex = tableNumber++;
-        LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: "
-                          << info->Name << "\n");
-        auto *wasmSym = make<WasmSymbol>(*info, globalType, &import.Table,
-                                         eventType, signature);
-        symbols.push_back(createUndefined(*wasmSym, false));
-        // Because there are no TABLE_NUMBER relocs in this case, we can't
-        // compute accurate liveness info; instead, just mark the symbol as
-        // always live.
-        symbols.back()->markLive();
-      }
+// As indirect function table table usage in MVP objects cannot be relocated,
+// the linker must ensure that this table gets assigned index zero.
+void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
+    uint32_t tableSymbolCount) {
+  uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size();
+
+  // If there are symbols for all tables, then all is good.
+  if (tableCount == tableSymbolCount)
+    return;
+
+  // It's possible for an input to define tables and also use the indirect
+  // function table, but forget to compile with -mattr=+reference-types.
+  // For these newer files, we require symbols for all tables, and
+  // relocations for all of their uses.
+  if (tableSymbolCount != 0) {
+    error(toString(this) +
+          ": expected one symbol table entry for each of the " +
+          Twine(tableCount) + " table(s) present, but got " +
+          Twine(tableSymbolCount) + " symbol(s) instead.");
+    return;
+  }
+
+  // An MVP object file can have up to one table import, for the indirect
+  // function table, but will have no table definitions.
+  if (tables.size()) {
+    error(toString(this) +
+          ": unexpected table definition(s) without corresponding "
+          "symbol-table entries.");
+    return;
+  }
+
+  // An MVP object file can have only one table import.
+  if (tableCount != 1) {
+    error(toString(this) +
+          ": multiple table imports, but no corresponding symbol-table "
+          "entries.");
+    return;
+  }
+
+  const WasmImport *tableImport = nullptr;
+  for (const auto &import : wasmObj->imports()) {
+    if (import.Kind == WASM_EXTERNAL_TABLE) {
+      assert(!tableImport);
+      tableImport = &import;
     }
   }
-  for (const auto &table : tables) {
-    auto *info = make<llvm::wasm::WasmSymbolInfo>();
-    // Empty name.
-    info->Kind = WASM_SYMBOL_TYPE_TABLE;
-    info->Flags = WASM_SYMBOL_BINDING_LOCAL;
-    info->Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN;
-    info->Flags |= WASM_SYMBOL_NO_STRIP;
-    info->ElementIndex = tableNumber++;
-    LLVM_DEBUG(dbgs() << "Synthesizing symbol for table definition: "
-                      << info->Name << "\n");
-    auto *wasmSym = make<WasmSymbol>(*info, globalType, &table->getType(),
-                                     eventType, signature);
-    symbols.push_back(createDefined(*wasmSym));
-    // Mark live, for the same reasons as for imported tables.
-    symbols.back()->markLive();
+  assert(tableImport);
+
+  // We can only synthesize a symtab entry for the indirect function table; if
+  // it has an unexpected name or type, assume that it's not actually the
+  // indirect function table.
+  if (tableImport->Field != functionTableName ||
+      tableImport->Table.ElemType != uint8_t(ValType::FUNCREF)) {
+    error(toString(this) + ": table import " + Twine(tableImport->Field) +
+          " is missing a symbol table entry.");
+    return;
   }
+
+  auto *info = make<WasmSymbolInfo>();
+  info->Name = tableImport->Field;
+  info->Kind = WASM_SYMBOL_TYPE_TABLE;
+  info->ImportModule = tableImport->Module;
+  info->ImportName = tableImport->Field;
+  info->Flags = WASM_SYMBOL_UNDEFINED;
+  info->Flags |= WASM_SYMBOL_NO_STRIP;
+  info->ElementIndex = 0;
+  LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info->Name
+                    << "\n");
+  const WasmGlobalType *globalType = nullptr;
+  const WasmEventType *eventType = nullptr;
+  const WasmSignature *signature = nullptr;
+  auto *wasmSym = make<WasmSymbol>(*info, globalType, &tableImport->Table,
+                                   eventType, signature);
+  Symbol *sym = createUndefined(*wasmSym, false);
+  // We're only sure it's a TableSymbol if the createUndefined succeeded.
+  if (errorCount())
+    return;
+  symbols.push_back(sym);
+  // Because there are no TABLE_NUMBER relocs, we can't compute accurate
+  // liveness info; instead, just mark the symbol as always live.
+  sym->markLive();
+
+  // We assume that this compilation unit has unrelocatable references to
+  // this table.
+  config->legacyFunctionTable = true;
 }
 
 void ObjFile::parse(bool ignoreComdats) {
@@ -487,11 +527,11 @@ void ObjFile::parse(bool ignoreComdats) {
 
   // Populate `Symbols` based on the symbols in the object.
   symbols.reserve(wasmObj->getNumberOfSymbols());
-  bool haveTableSymbol = false;
+  uint32_t tableSymbolCount = 0;
   for (const SymbolRef &sym : wasmObj->symbols()) {
     const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
     if (wasmSym.isTypeTable())
-      haveTableSymbol = true;
+      tableSymbolCount++;
     if (wasmSym.isDefined()) {
       // createDefined may fail if the symbol is comdat excluded in which case
       // we fall back to creating an undefined symbol
@@ -504,12 +544,7 @@ void ObjFile::parse(bool ignoreComdats) {
     symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx]));
   }
 
-  // As a stopgap measure while implementing table support, if the object file
-  // has table definitions or imports but no table symbols, synthesize symbols
-  // for those tables.  Mark as NO_STRIP to ensure they reach the output file,
-  // even if there are no TABLE_NUMBER relocs against them.
-  if (!haveTableSymbol)
-    synthesizeTableSymbols();
+  addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount);
 }
 
 bool ObjFile::isExcludedByComdat(InputChunk *chunk) const {

diff  --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index 8a471389e7d1..8152420cb639 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -157,7 +157,7 @@ class ObjFile : public InputFile {
   Symbol *createUndefined(const WasmSymbol &sym, bool isCalledDirectly);
 
   bool isExcludedByComdat(InputChunk *chunk) const;
-  void synthesizeTableSymbols();
+  void addLegacyIndirectFunctionTableIfNeeded(uint32_t tableSymbolCount);
 
   std::unique_ptr<WasmObjectFile> wasmObj;
 };

diff  --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index 269357921589..9eed089ee0c1 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -366,6 +366,8 @@ uint32_t TableSymbol::getTableNumber() const {
 }
 
 void TableSymbol::setTableNumber(uint32_t number) {
+  if (const auto *t = dyn_cast<DefinedTable>(this))
+    return t->table->assignIndex(number);
   LLVM_DEBUG(dbgs() << "setTableNumber " << name << " -> " << number << "\n");
   assert(tableNumber == INVALID_INDEX);
   tableNumber = number;

diff  --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index ce4b1437f4fb..868d6d53c9f8 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -218,10 +218,34 @@ void TableSection::writeBody() {
 void TableSection::addTable(InputTable *table) {
   if (!table->live)
     return;
-  uint32_t tableNumber =
-      out.importSec->getNumImportedTables() + inputTables.size();
+  // Some inputs require that the indirect function table be assigned to table
+  // number 0.
+  if (config->legacyFunctionTable &&
+      isa<DefinedTable>(WasmSym::indirectFunctionTable) &&
+      cast<DefinedTable>(WasmSym::indirectFunctionTable)->table == table) {
+    if (out.importSec->getNumImportedTables()) {
+      // Alack!  Some other input imported a table, meaning that we are unable
+      // to assign table number 0 to the indirect function table.
+      for (const auto *culprit : out.importSec->importedSymbols) {
+        if (isa<UndefinedTable>(culprit)) {
+          error("object file not built with 'reference-types' feature "
+                "conflicts with import of table " +
+                culprit->getName() + "by file " + toString(culprit->getFile()));
+          return;
+        }
+      }
+      llvm_unreachable("failed to find conflicting table import");
+    }
+    inputTables.insert(inputTables.begin(), table);
+    return;
+  }
   inputTables.push_back(table);
-  table->assignIndex(tableNumber);
+}
+
+void TableSection::assignIndexes() {
+  uint32_t tableNumber = out.importSec->getNumImportedTables();
+  for (InputTable *t : inputTables)
+    t->assignIndex(tableNumber++);
 }
 
 void MemorySection::writeBody() {

diff  --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index 5b753d441178..586fd7226c35 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -151,6 +151,7 @@ class TableSection : public SyntheticSection {
   TableSection() : SyntheticSection(llvm::wasm::WASM_SEC_TABLE) {}
 
   bool isNeeded() const override { return inputTables.size() > 0; };
+  void assignIndexes() override;
   void writeBody() override;
   void addTable(InputTable *table);
 

diff  --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index d91b6b811081..1775096b1938 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -545,6 +545,15 @@ void Writer::populateTargetFeatures() {
 }
 
 static bool shouldImport(Symbol *sym) {
+  if (!sym->isUndefined())
+    return false;
+  if (sym->isWeak() && !config->relocatable)
+    return false;
+  if (!sym->isLive())
+    return false;
+  if (!sym->isUsedInRegularObj)
+    return false;
+
   // We don't generate imports for data symbols. They however can be imported
   // as GOT entries.
   if (isa<DataSymbol>(sym))
@@ -566,19 +575,20 @@ static bool shouldImport(Symbol *sym) {
 }
 
 void Writer::calculateImports() {
+  // Some inputs require that the indirect function table be assigned to table
+  // number 0, so if it is present and is an import, allocate it before any
+  // other tables.
+  if (WasmSym::indirectFunctionTable &&
+      shouldImport(WasmSym::indirectFunctionTable))
+    out.importSec->addImport(WasmSym::indirectFunctionTable);
+
   for (Symbol *sym : symtab->getSymbols()) {
-    if (!sym->isUndefined())
-      continue;
-    if (sym->isWeak() && !config->relocatable)
+    if (!shouldImport(sym))
       continue;
-    if (!sym->isLive())
+    if (sym == WasmSym::indirectFunctionTable)
       continue;
-    if (!sym->isUsedInRegularObj)
-      continue;
-    if (shouldImport(sym)) {
-      LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n");
-      out.importSec->addImport(sym);
-    }
+    LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n");
+    out.importSec->addImport(sym);
   }
 }
 
@@ -789,6 +799,7 @@ void Writer::assignIndexes() {
     out.tableSec->addTable(table);
 
   out.globalSec->assignIndexes();
+  out.tableSec->assignIndexes();
 }
 
 static StringRef getOutputDataSegmentName(StringRef name) {


        


More information about the llvm-commits mailing list