[llvm] 1d445a6 - Reland: "[WebAssembly] Deduplicate imports of the same module name, field name, and type"

Sam Clegg via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 22 14:16:57 PDT 2021


Author: Nick Fitzgerald
Date: 2021-07-22T14:16:05-07:00
New Revision: 1d445a6e7679cc188fd051f7397b7d9ca8ce4f10

URL: https://github.com/llvm/llvm-project/commit/1d445a6e7679cc188fd051f7397b7d9ca8ce4f10
DIFF: https://github.com/llvm/llvm-project/commit/1d445a6e7679cc188fd051f7397b7d9ca8ce4f10.diff

LOG: Reland: "[WebAssembly] Deduplicate imports of the same module name, field name, and type"

When two symbols import the same thing, only one import should be
emitted in the Wasm file.

Fixes https://bugs.llvm.org/show_bug.cgi?id=50938

Reverted in: 16aac493e59519377071e900d119ba2e7e5b525d.

Reviewed By: sbc100

Differential Revision: https://reviews.llvm.org/D105519

Added: 
    lld/test/wasm/duplicate-function-imports.s
    lld/test/wasm/duplicate-global-imports.s
    lld/test/wasm/duplicate-table-imports.s

Modified: 
    lld/wasm/SyntheticSections.cpp
    lld/wasm/SyntheticSections.h
    llvm/include/llvm/BinaryFormat/Wasm.h
    llvm/include/llvm/BinaryFormat/WasmTraits.h

Removed: 
    


################################################################################
diff  --git a/lld/test/wasm/duplicate-function-imports.s b/lld/test/wasm/duplicate-function-imports.s
new file mode 100644
index 0000000000000..e2c942f624088
--- /dev/null
+++ b/lld/test/wasm/duplicate-function-imports.s
@@ -0,0 +1,57 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
+# RUN: wasm-ld -o %t1.wasm %t.o
+# RUN: obj2yaml %t1.wasm | FileCheck %s
+
+.globl f1
+.import_module f1, env
+.import_name f1, f
+.functype f1 () -> (i32)
+
+# Same import module/name/type as `f1`, should be de-duped.
+.globl f2
+.import_module f2, env
+.import_name f2, f
+.functype f2 () -> (i32)
+
+# Same import module/name, but 
diff erent type. Should not be de-duped.
+.globl f3
+.import_module f3, env
+.import_name f3, f
+.functype f3 () -> (f32)
+
+.globl _start
+_start:
+  .functype _start () -> ()
+  call f1
+  drop
+  call f2
+  drop
+  call f3
+  drop
+  end_function
+
+
+# CHECK:        - Type:            TYPE
+# CHECK-NEXT:     Signatures:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ParamTypes:      []
+# CHECK-NEXT:         ReturnTypes:
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:       - Index:           1
+# CHECK-NEXT:         ParamTypes:      []
+# CHECK-NEXT:         ReturnTypes:
+# CHECK-NEXT:           - F32
+# CHECK-NEXT:       - Index:           2
+# CHECK-NEXT:         ParamTypes:      []
+# CHECK-NEXT:         ReturnTypes:     []
+# CHECK-NEXT:   - Type:            IMPORT
+# CHECK-NEXT:     Imports:
+# CHECK-NEXT:       - Module:          env
+# CHECK-NEXT:         Field:           f
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         SigIndex:        0
+# CHECK-NEXT:       - Module:          env
+# CHECK-NEXT:         Field:           f
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         SigIndex:        1
+# CHECK-NEXT:   - Type:

diff  --git a/lld/test/wasm/duplicate-global-imports.s b/lld/test/wasm/duplicate-global-imports.s
new file mode 100644
index 0000000000000..8f057cd4f19e1
--- /dev/null
+++ b/lld/test/wasm/duplicate-global-imports.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
+# RUN: wasm-ld --no-check-features -o %t1.wasm %t.o
+# RUN: obj2yaml %t1.wasm | FileCheck %s
+
+.global g1
+.import_module g1, env
+.import_name g1, g
+.globaltype g1, i64, immutable
+
+# Same import module/name/type as `g1`, should be de-duped.
+.global g2
+.import_module g2, env
+.import_name g2, g
+.globaltype g2, i64, immutable
+
+# Imported as an i32 instead of i64, so should not be de-duped.
+.global g3
+.import_module g3, env
+.import_name g3, g
+.globaltype g3, i32, immutable
+
+# Imported as mutable instead of immutable, so should not be de-duped.
+.global g4
+.import_module g4, env
+.import_name g4, g
+.globaltype g4, i64
+
+.globl _start
+_start:
+  .functype _start () -> ()
+  global.get g1
+  drop
+  global.get g2
+  drop
+  global.get g3
+  drop
+  global.get g4
+  drop
+  end_function
+
+
+# CHECK:        - Type:            IMPORT
+# CHECK-NEXT:     Imports:
+# CHECK-NEXT:      - Module:          env
+# CHECK-NEXT:        Field:           g
+# CHECK-NEXT:        Kind:            GLOBAL
+# CHECK-NEXT:        GlobalType:      I64
+# CHECK-NEXT:        GlobalMutable:   false
+# CHECK-NEXT:      - Module:          env
+# CHECK-NEXT:        Field:           g
+# CHECK-NEXT:        Kind:            GLOBAL
+# CHECK-NEXT:        GlobalType:      I32
+# CHECK-NEXT:        GlobalMutable:   false
+# CHECK-NEXT:      - Module:          env
+# CHECK-NEXT:        Field:           g
+# CHECK-NEXT:        Kind:            GLOBAL
+# CHECK-NEXT:        GlobalType:      I64
+# CHECK-NEXT:        GlobalMutable:   true
+# CHECK-NEXT:  - Type:
+
+# CHECK:         GlobalNames:
+# CHECK-NEXT:      - Index:           0
+# CHECK-NEXT:        Name:            g1
+# CHECK-NEXT:      - Index:           1
+# CHECK-NEXT:        Name:            g3
+# CHECK-NEXT:      - Index:           2
+# CHECK-NEXT:        Name:            g4
+# CHECK-NEXT:      - Index:           3
+# CHECK-NEXT:        Name:            __stack_pointer

diff  --git a/lld/test/wasm/duplicate-table-imports.s b/lld/test/wasm/duplicate-table-imports.s
new file mode 100644
index 0000000000000..aa3307e221824
--- /dev/null
+++ b/lld/test/wasm/duplicate-table-imports.s
@@ -0,0 +1,75 @@
+# RUN: llvm-mc -mattr=+reference-types -triple=wasm32-unknown-unknown -filetype=obj -o %t.o %s
+# RUN: wasm-ld --allow-undefined -o %t1.wasm %t.o
+# RUN: obj2yaml %t1.wasm | FileCheck %s
+
+.tabletype t1, funcref
+.import_module t1, env
+.import_name t1, t
+.globl t1
+
+# Same import module/name/type as `t1`, should be de-duped.
+.tabletype t2, funcref
+.import_module t2, env
+.import_name t2, t
+.globl t2
+
+# Imported as an externref instead of funcref, so should not be de-duped.
+.tabletype t3, externref
+.import_module t3, env
+.import_name t3, t
+.globl t3
+
+.globl _start
+_start:
+  .functype _start () -> ()
+
+  # Read from `t1`
+  i32.const 0
+  table.get t1
+  drop
+
+  # Read from `t2`
+  i32.const 0
+  table.get t2
+  drop
+
+  # Read from `t3`
+  i32.const 0
+  table.get t3
+  drop
+
+  end_function
+
+## XXX: the second imported table has index 1, not 0. I've verified by hand
+## (with `wasm2wat`) that the resulting Wasm file is correct: `t3` does end up
+## at index 1 and our `table.get` instructions are using the proper table index
+## immediates. This is also asserted (less legibly) in the hexdump of the code
+## body below. It looks like there's a bug in how `obj2yaml` disassembles
+## multiple table imports.
+
+# CHECK:        - Type:            IMPORT
+# CHECK-NEXT:     Imports:
+# CHECK-NEXT:       - Module:          env
+# CHECK-NEXT:         Field:           t
+# CHECK-NEXT:         Kind:            TABLE
+# CHECK-NEXT:         Table:
+# CHECK-NEXT:           Index:           0
+# CHECK-NEXT:           ElemType:        FUNCREF
+# CHECK-NEXT:           Limits:
+# CHECK-NEXT:             Minimum:         0x0
+# CHECK-NEXT:       - Module:          env
+# CHECK-NEXT:         Field:           t
+# CHECK-NEXT:         Kind:            TABLE
+# CHECK-NEXT:         Table:
+# CHECK-NEXT:           Index:           0
+# CHECK-NEXT:           ElemType:        EXTERNREF
+# CHECK-NEXT:           Limits:
+# CHECK-NEXT:             Minimum:         0x0
+# CHECK-NEXT:   - Type:
+
+# CHECK:        - Type:            CODE
+# CHECK-NEXT:     Functions:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            41002580808080001A41002580808080001A41002581808080001A0B
+# CHECK-NEXT:   - Type:

diff  --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index dcbf1302cc910..fc37115844b7e 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -109,15 +109,43 @@ void ImportSection::addGOTEntry(Symbol *sym) {
 
 void ImportSection::addImport(Symbol *sym) {
   assert(!isSealed);
-  importedSymbols.emplace_back(sym);
-  if (auto *f = dyn_cast<FunctionSymbol>(sym))
-    f->setFunctionIndex(numImportedFunctions++);
-  else if (auto *g = dyn_cast<GlobalSymbol>(sym))
-    g->setGlobalIndex(numImportedGlobals++);
-  else if (auto *t = dyn_cast<TagSymbol>(sym))
+  StringRef module = sym->importModule.getValueOr(defaultModule);
+  StringRef name = sym->importName.getValueOr(sym->getName());
+  if (auto *f = dyn_cast<FunctionSymbol>(sym)) {
+    ImportKey<WasmSignature> key(*(f->getSignature()), module, name);
+    auto entry = importedFunctions.try_emplace(key, numImportedFunctions);
+    if (entry.second) {
+      importedSymbols.emplace_back(sym);
+      f->setFunctionIndex(numImportedFunctions++);
+    } else {
+      f->setFunctionIndex(entry.first->second);
+    }
+  } else if (auto *g = dyn_cast<GlobalSymbol>(sym)) {
+    ImportKey<WasmGlobalType> key(*(g->getGlobalType()), module, name);
+    auto entry = importedGlobals.try_emplace(key, numImportedGlobals);
+    if (entry.second) {
+      importedSymbols.emplace_back(sym);
+      g->setGlobalIndex(numImportedGlobals++);
+    } else {
+      g->setGlobalIndex(entry.first->second);
+    }
+  } else if (auto *t = dyn_cast<TagSymbol>(sym)) {
+    // NB: There's currently only one possible kind of tag, and no
+    // `UndefinedTag`, so we don't bother de-duplicating tag imports.
+    importedSymbols.emplace_back(sym);
     t->setTagIndex(numImportedTags++);
-  else
-    cast<TableSymbol>(sym)->setTableNumber(numImportedTables++);
+  } else {
+    assert(TableSymbol::classof(sym));
+    auto *table = cast<TableSymbol>(sym);
+    ImportKey<WasmTableType> key(*(table->getTableType()), module, name);
+    auto entry = importedTables.try_emplace(key, numImportedTables);
+    if (entry.second) {
+      importedSymbols.emplace_back(sym);
+      table->setTableNumber(numImportedTables++);
+    } else {
+      table->setTableNumber(entry.first->second);
+    }
+  }
 }
 
 void ImportSection::writeBody() {

diff  --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index f57fc8832dd28..8c678fa0853f3 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -96,6 +96,68 @@ class TypeSection : public SyntheticSection {
   llvm::DenseMap<WasmSignature, int32_t> typeIndices;
 };
 
+/**
+ * A key for some kind of imported entity of type `T`.
+ *
+ * Used when de-duplicating imports.
+ */
+template <typename T> struct ImportKey {
+public:
+  enum class State { Plain, Empty, Tombstone };
+
+public:
+  T type;
+  llvm::Optional<StringRef> importModule;
+  llvm::Optional<StringRef> importName;
+  State state;
+
+public:
+  ImportKey(T type) : type(type), state(State::Plain) {}
+  ImportKey(T type, State state) : type(type), state(state) {}
+  ImportKey(T type, llvm::Optional<StringRef> importModule,
+            llvm::Optional<StringRef> importName)
+      : type(type), importModule(importModule), importName(importName),
+        state(State::Plain) {}
+};
+
+template <typename T>
+inline bool operator==(const ImportKey<T> &lhs, const ImportKey<T> &rhs) {
+  return lhs.state == rhs.state && lhs.importModule == rhs.importModule &&
+         lhs.importName == rhs.importName && lhs.type == rhs.type;
+}
+
+} // namespace wasm
+} // namespace lld
+
+// `ImportKey<T>` can be used as a key in a `DenseMap` if `T` can be used as a
+// key in a `DenseMap`.
+template <typename T> struct llvm::DenseMapInfo<lld::wasm::ImportKey<T>> {
+  static lld::wasm::ImportKey<T> getEmptyKey() {
+    typename lld::wasm::ImportKey<T> key(llvm::DenseMapInfo<T>::getEmptyKey());
+    key.state = lld::wasm::ImportKey<T>::State::Empty;
+    return key;
+  }
+  static lld::wasm::ImportKey<T> getTombstoneKey() {
+    typename lld::wasm::ImportKey<T> key(llvm::DenseMapInfo<T>::getEmptyKey());
+    key.state = lld::wasm::ImportKey<T>::State::Tombstone;
+    return key;
+  }
+  static unsigned getHashValue(const lld::wasm::ImportKey<T> &key) {
+    uintptr_t hash = hash_value(key.importModule);
+    hash = hash_combine(hash, key.importName);
+    hash = hash_combine(hash, llvm::DenseMapInfo<T>::getHashValue(key.type));
+    hash = hash_combine(hash, key.state);
+    return hash;
+  }
+  static bool isEqual(const lld::wasm::ImportKey<T> &lhs,
+                      const lld::wasm::ImportKey<T> &rhs) {
+    return lhs == rhs;
+  }
+};
+
+namespace lld {
+namespace wasm {
+
 class ImportSection : public SyntheticSection {
 public:
   ImportSection() : SyntheticSection(llvm::wasm::WASM_SEC_IMPORT) {}
@@ -131,6 +193,9 @@ class ImportSection : public SyntheticSection {
   unsigned numImportedFunctions = 0;
   unsigned numImportedTags = 0;
   unsigned numImportedTables = 0;
+  llvm::DenseMap<ImportKey<WasmGlobalType>, uint32_t> importedGlobals;
+  llvm::DenseMap<ImportKey<WasmSignature>, uint32_t> importedFunctions;
+  llvm::DenseMap<ImportKey<WasmTableType>, uint32_t> importedTables;
 };
 
 class FunctionSection : public SyntheticSection {

diff  --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 61544c364a3ba..c38e64928521f 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -429,6 +429,16 @@ inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
   return !(LHS == RHS);
 }
 
+inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
+  return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
+         (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum
+                                               : true);
+}
+
+inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
+  return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
+}
+
 std::string toString(WasmSymbolType type);
 std::string relocTypetoString(uint32_t type);
 bool relocTypeHasAddend(uint32_t type);

diff  --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h
index e34182499187f..930ee690bcc05 100644
--- a/llvm/include/llvm/BinaryFormat/WasmTraits.h
+++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -63,6 +63,49 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> {
   }
 };
 
+// Traits for using WasmLimits in a DenseMap
+template <> struct DenseMapInfo<wasm::WasmLimits> {
+  static wasm::WasmLimits getEmptyKey() {
+    return wasm::WasmLimits{0xff, 0xff, 0xff};
+  }
+  static wasm::WasmLimits getTombstoneKey() {
+    return wasm::WasmLimits{0xee, 0xee, 0xee};
+  }
+  static unsigned getHashValue(const wasm::WasmLimits &Limits) {
+    unsigned Hash = hash_value(Limits.Flags);
+    Hash = hash_combine(Hash, Limits.Minimum);
+    if (Limits.Flags & llvm::wasm::WASM_LIMITS_FLAG_HAS_MAX) {
+      Hash = hash_combine(Hash, Limits.Maximum);
+    }
+    return Hash;
+  }
+  static bool isEqual(const wasm::WasmLimits &LHS,
+                      const wasm::WasmLimits &RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Traits for using WasmTableType in a DenseMap
+template <> struct DenseMapInfo<wasm::WasmTableType> {
+  static wasm::WasmTableType getEmptyKey() {
+    return wasm::WasmTableType{0,
+                               DenseMapInfo<wasm::WasmLimits>::getEmptyKey()};
+  }
+  static wasm::WasmTableType getTombstoneKey() {
+    return wasm::WasmTableType{
+        1, DenseMapInfo<wasm::WasmLimits>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const wasm::WasmTableType &TableType) {
+    return hash_combine(
+        TableType.ElemType,
+        DenseMapInfo<wasm::WasmLimits>::getHashValue(TableType.Limits));
+  }
+  static bool isEqual(const wasm::WasmTableType &LHS,
+                      const wasm::WasmTableType &RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_BINARYFORMAT_WASMTRAITS_H


        


More information about the llvm-commits mailing list