[lld] 4690bf2 - [lld][WebAssembly] Take advantage of extended const expressions when available
Sam Clegg via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 15 17:50:34 PDT 2022
Author: Sam Clegg
Date: 2022-03-15T17:50:05-07:00
New Revision: 4690bf2ed31b06e8403a1e7790e9739becabd4f6
URL: https://github.com/llvm/llvm-project/commit/4690bf2ed31b06e8403a1e7790e9739becabd4f6
DIFF: https://github.com/llvm/llvm-project/commit/4690bf2ed31b06e8403a1e7790e9739becabd4f6.diff
LOG: [lld][WebAssembly] Take advantage of extended const expressions when available
In particular we use these in two places:
1. When building PIC code we no longer need to combine output segments
into a single segment that can be initialized at `__memory_base`.
Instead each segment can encode its offset from `__memory_base` in
its initializer. e.g.
```
(i32.add (global.get __memory_base) (i32.const offset)
```
2. When building PIC code we no longer need to relocation internalized
global addresses. We can just initialize them with their correct
offsets.
Differential Revision: https://reviews.llvm.org/D121420
Added:
Modified:
lld/test/wasm/pie.ll
lld/test/wasm/tls-non-shared-memory.s
lld/wasm/Config.h
lld/wasm/OutputSections.cpp
lld/wasm/SyntheticSections.cpp
lld/wasm/SyntheticSections.h
lld/wasm/Writer.cpp
Removed:
################################################################################
diff --git a/lld/test/wasm/pie.ll b/lld/test/wasm/pie.ll
index 0ae1c9ed416cf..71c7d1fb5ce53 100644
--- a/lld/test/wasm/pie.ll
+++ b/lld/test/wasm/pie.ll
@@ -92,6 +92,51 @@ declare void @external_func()
; DISASSEM-NEXT: call 2
; DISASSEM-NEXT: end
+; Run the same test with extended-const support. When this is available
+; we don't need __wasm_apply_global_relocs and instead rely on the add
+; instruction in the InitExpr. We also, therefore, do not need these globals
+; to be mutable.
+
+; RUN: llc -relocation-model=pic -mattr=+extended-const,+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.extended.o
+; RUN: wasm-ld --no-gc-sections --allow-undefined --experimental-pic -pie -o %t.extended.wasm %t.extended.o
+; RUN: obj2yaml %t.extended.wasm | FileCheck %s --check-prefix=EXTENDED-CONST
+
+; EXTENDED-CONST-NOT: __wasm_apply_global_relocs
+
+; EXTENDED-CONST: - Type: GLOBAL
+; EXTENDED-CONST-NEXT: Globals:
+; EXTENDED-CONST-NEXT: - Index: 4
+; EXTENDED-CONST-NEXT: Type: I32
+; EXTENDED-CONST-NEXT: Mutable: false
+; EXTENDED-CONST-NEXT: InitExpr:
+; EXTENDED-CONST-NEXT: Opcode: GLOBAL_GET
+; EXTENDED-CONST-NEXT: Index: 1
+; EXTENDED-CONST-NEXT: - Index: 5
+; EXTENDED-CONST-NEXT: Type: I32
+; EXTENDED-CONST-NEXT: Mutable: false
+; EXTENDED-CONST-NEXT: InitExpr:
+; EXTENDED-CONST-NEXT: Extended: true
+; EXTENDED-CONST-NEXT: Body: 230141046A0B
+; EXTENDED-CONST-NEXT: - Index: 6
+; EXTENDED-CONST-NEXT: Type: I32
+; EXTENDED-CONST-NEXT: Mutable: false
+; EXTENDED-CONST-NEXT: InitExpr:
+; EXTENDED-CONST-NEXT: Extended: true
+; This instruction sequence decodes to:
+; (global.get[0x23] 0x1 i32.const[0x41] 0x0C i32.add[0x6A] end[0x0b])
+; EXTENDED-CONST-NEXT: Body: 2301410C6A0B
+
+; EXTENDED-CONST: - Type: START
+; EXTENDED-CONST-NEXT: StartFunction: 2
+
+; EXTENDED-CONST: FunctionNames:
+; EXTENDED-CONST-NEXT: - Index: 0
+; EXTENDED-CONST-NEXT: Name: external_func
+; EXTENDED-CONST-NEXT: - Index: 1
+; EXTENDED-CONST-NEXT: Name: __wasm_call_ctors
+; EXTENDED-CONST-NEXT: - Index: 2
+; EXTENDED-CONST-NEXT: Name: __wasm_apply_data_relocs
+
; Run the same test with threading support. In this mode
; we expect __wasm_init_memory and __wasm_apply_data_relocs
; to be generated along with __wasm_start as the start
@@ -100,7 +145,7 @@ declare void @external_func()
; RUN: llc -relocation-model=pic -mattr=+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.shmem.o
; RUN: wasm-ld --no-gc-sections --shared-memory --allow-undefined --experimental-pic -pie -o %t.shmem.wasm %t.shmem.o
; RUN: obj2yaml %t.shmem.wasm | FileCheck %s --check-prefix=SHMEM
-; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefixes DISASSEM-SHMEM
+; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefix DISASSEM-SHMEM
; SHMEM: - Type: START
; SHMEM-NEXT: StartFunction: 6
@@ -132,4 +177,3 @@ declare void @external_func()
; SHMEM-NEXT: Name: get_data_address
; SHMEM-NEXT: - Index: 9
; SHMEM-NEXT: Name: _start
-
diff --git a/lld/test/wasm/tls-non-shared-memory.s b/lld/test/wasm/tls-non-shared-memory.s
index 2c010049e1939..1754fd6254bb8 100644
--- a/lld/test/wasm/tls-non-shared-memory.s
+++ b/lld/test/wasm/tls-non-shared-memory.s
@@ -51,6 +51,9 @@ tls1:
# RUN: wasm-ld --experimental-pic --no-gc-sections --no-entry -pie -o %t-pie.wasm %t.o
# RUN: obj2yaml %t-pie.wasm | FileCheck %s --check-prefixes=PIE,PIC
+# RUN: wasm-ld --experimental-pic --features=atomics,bulk-memory,extended-const --no-gc-sections --no-entry -pie -o %t-extended-const.wasm %t.o
+# RUN: obj2yaml %t-extended-const.wasm | FileCheck %s --check-prefixes=EXT-CONST
+
# CHECK: - Type: GLOBAL
# __stack_pointer
# CHECK-NEXT: Globals:
@@ -136,3 +139,24 @@ tls1:
# PIC-NEXT: Index: {{\d*}}
# PIC-NEXT: Content: 2B0000002A000000
# PIC-NEXT: - Type: CUSTOM
+
+# Unless we have extended-const, in which case the merging is not needed.
+# The first segment is placed directly at `__memory_base` and the second
+# one is offset from `__memory_base` using `i32.add` and a constant.
+
+# EXT-CONST: - Type: DATA
+# EXT-CONST-NEXT: Segments:
+# EXT-CONST-NEXT: - SectionOffset: 6
+# EXT-CONST-NEXT: InitFlags: 0
+# EXT-CONST-NEXT: Offset:
+# EXT-CONST-NEXT: Opcode: GLOBAL_GET
+# EXT-CONST-NEXT: Index: 1
+# EXT-CONST-NEXT: Content: 2B000000
+# EXT-CONST-NEXT: - SectionOffset: 18
+# EXT-CONST-NEXT: InitFlags: 0
+# EXT-CONST-NEXT: Offset:
+# EXT-CONST-NEXT: Extended: true
+# This instruction sequence decodes to:
+# (global.get[0x23] 0x1 i32.const[0x41] 0x04 i32.add[0x6A] end[0x0b])
+# EXT-CONST-NEXT: Body: 230141046A0B
+# EXT-CONST-NEXT: Content: 2A000000
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 2cde65144ce66..44e595cbbd62e 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -35,6 +35,7 @@ struct Configuration {
bool exportAll;
bool exportDynamic;
bool exportTable;
+ bool extendedConst;
bool growableTable;
bool gcSections;
bool importMemory;
diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp
index a9c2db255b677..d23fcc35883cc 100644
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -143,12 +143,14 @@ void DataSection::finalizeContents() {
});
#endif
- assert((config->sharedMemory || !config->isPic || activeCount <= 1) &&
+ assert((config->sharedMemory || !config->isPic || config->extendedConst ||
+ activeCount <= 1) &&
"output segments should have been combined by now");
writeUleb128(os, segmentCount, "data segment count");
os.flush();
bodySize = dataSectionHeader.size();
+ bool is64 = config->is64.getValueOr(false);
for (OutputSegment *segment : segments) {
if (!segment->requiredInBinary())
@@ -158,15 +160,27 @@ void DataSection::finalizeContents() {
if (segment->initFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX)
writeUleb128(os, 0, "memory index");
if ((segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
- WasmInitExpr initExpr;
- initExpr.Extended = false;
- if (config->isPic) {
- initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
- initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex();
+ if (config->isPic && config->extendedConst) {
+ writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get");
+ writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
+ "literal (global index)");
+ if (segment->startVA) {
+ writePtrConst(os, segment->startVA, is64, "offset");
+ writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
+ }
+ writeU8(os, WASM_OPCODE_END, "opcode:end");
} else {
- initExpr = intConst(segment->startVA, config->is64.getValueOr(false));
+ WasmInitExpr initExpr;
+ initExpr.Extended = false;
+ if (config->isPic) {
+ assert(segment->startVA == 0);
+ initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
+ initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex();
+ } else {
+ initExpr = intConst(segment->startVA, is64);
+ }
+ writeInitExpr(os, initExpr);
}
- writeInitExpr(os, initExpr);
}
writeUleb128(os, segment->size, "segment size");
os.flush();
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index c47c58ce1961b..7bc9c274ff984 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -414,6 +414,7 @@ void GlobalSection::addInternalGOTEntry(Symbol *sym) {
}
void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
+ assert(!config->extendedConst);
bool is64 = config->is64.getValueOr(false);
unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
: WASM_OPCODE_I32_CONST;
@@ -469,10 +470,10 @@ void GlobalSection::writeBody() {
for (const Symbol *sym : internalGotSymbols) {
bool mutable_ = false;
if (!sym->isStub) {
- // In the case of dynamic linking, these global must to be mutable since
- // they get updated to the correct runtime value during
- // `__wasm_apply_global_relocs`.
- if (config->isPic && !sym->isTLS())
+ // In the case of dynamic linking, unless we have 'extended-const'
+ // available, these global must to be mutable since they get updated to
+ // the correct runtime value during `__wasm_apply_global_relocs`.
+ if (!config->extendedConst && config->isPic && !sym->isTLS())
mutable_ = true;
// With multi-theadeding any TLS globals must be mutable since they get
// set during `__wasm_apply_global_tls_relocs`
@@ -480,17 +481,33 @@ void GlobalSection::writeBody() {
mutable_ = true;
}
WasmGlobalType type{itype, mutable_};
- WasmInitExpr initExpr;
- if (auto *d = dyn_cast<DefinedData>(sym))
- initExpr = intConst(d->getVA(), is64);
- else if (auto *f = dyn_cast<FunctionSymbol>(sym))
- initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
- else {
- assert(isa<UndefinedData>(sym));
- initExpr = intConst(0, is64);
- }
writeGlobalType(os, type);
- writeInitExpr(os, initExpr);
+
+ if (config->extendedConst && config->isPic && !sym->isTLS() &&
+ isa<DefinedData>(sym)) {
+ // We can use an extended init expression to add a constant
+ // offset of __memory_base.
+ auto *d = cast<DefinedData>(sym);
+ writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get");
+ writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
+ "literal (global index)");
+ if (d->getVA()) {
+ writePtrConst(os, d->getVA(), is64, "offset");
+ writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
+ }
+ writeU8(os, WASM_OPCODE_END, "opcode:end");
+ } else {
+ WasmInitExpr initExpr;
+ if (auto *d = dyn_cast<DefinedData>(sym))
+ initExpr = intConst(d->getVA(), is64);
+ else if (auto *f = dyn_cast<FunctionSymbol>(sym))
+ initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
+ else {
+ assert(isa<UndefinedData>(sym));
+ initExpr = intConst(0, is64);
+ }
+ writeInitExpr(os, initExpr);
+ }
}
for (const DefinedData *sym : dataAddressGlobals) {
WasmGlobalType type{itype, false};
diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index cb3b1888f1fa8..eac017883ef50 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -288,6 +288,8 @@ class GlobalSection : public SyntheticSection {
// transform a `global.get` to an `i32.const`.
void addInternalGOTEntry(Symbol *sym);
bool needsRelocations() {
+ if (config->extendedConst)
+ return false;
return llvm::find_if(internalGotSymbols, [=](Symbol *sym) {
return !sym->isTLS();
}) != internalGotSymbols.end();
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 4465bb2e37b3d..95e9ab810efa5 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -450,7 +450,7 @@ void Writer::populateTargetFeatures() {
auto &explicitFeatures = config->features.getValue();
allowed.insert(explicitFeatures.begin(), explicitFeatures.end());
if (!config->checkFeatures)
- return;
+ goto done;
}
// Find the sets of used, required, and disallowed features
@@ -486,7 +486,7 @@ void Writer::populateTargetFeatures() {
allowed.insert(std::string(key));
if (!config->checkFeatures)
- return;
+ goto done;
if (config->sharedMemory) {
if (disallowed.count("shared-mem"))
@@ -537,12 +537,19 @@ void Writer::populateTargetFeatures() {
}
}
+done:
// Normally we don't include bss segments in the binary. In particular if
// memory is not being imported then we can assume its zero initialized.
// In the case the memory is imported, we and we can use the memory.fill
// instrction than we can also avoid inluding the segments.
if (config->importMemory && !allowed.count("bulk-memory"))
config->emitBssSegments = true;
+
+ if (allowed.count("extended-const"))
+ config->extendedConst = true;
+
+ for (auto &feature : allowed)
+ log("Allowed feature: " + feature);
}
void Writer::checkImportExportTargetFeatures() {
@@ -921,9 +928,9 @@ void Writer::combineOutputSegments() {
// With PIC code we currently only support a single active data segment since
// we only have a single __memory_base to use as our base address. This pass
// combines all data segments into a single .data segment.
- // This restructions can be relaxed once we have extended constant
- // expressions available:
- // https://github.com/WebAssembly/extended-const
+ // This restriction does not apply when the extended const extension is
+ // available: https://github.com/WebAssembly/extended-const
+ assert(!config->extendedConst);
assert(config->isPic && !config->sharedMemory);
if (segments.size() <= 1)
return;
@@ -1555,7 +1562,14 @@ void Writer::run() {
}
}
- if (config->isPic && !config->sharedMemory) {
+ log("-- populateTargetFeatures");
+ populateTargetFeatures();
+
+ // When outputting PIC code each segment lives at at fixes offset from the
+ // `__memory_base` import. Unless we support the extended const expression we
+ // can't do addition inside the constant expression, so we much combine the
+ // segments into a single one that can live at `__memory_base`.
+ if (config->isPic && !config->extendedConst && !config->sharedMemory) {
// In shared memory mode all data segments are passive and initialized
// via __wasm_init_memory.
log("-- combineOutputSegments");
@@ -1572,8 +1586,6 @@ void Writer::run() {
scanRelocations();
log("-- finalizeIndirectFunctionTable");
finalizeIndirectFunctionTable();
- log("-- populateTargetFeatures");
- populateTargetFeatures();
log("-- createSyntheticInitFunctions");
createSyntheticInitFunctions();
log("-- assignIndexes");
More information about the llvm-commits
mailing list