[llvm] 45b7cf9 - [lld][WebAssembly] Enable string tail merging in debug sections
Sam Clegg via llvm-commits
llvm-commits at lists.llvm.org
Tue May 18 12:27:40 PDT 2021
Author: Sam Clegg
Date: 2021-05-18T12:25:39-07:00
New Revision: 45b7cf995551a0c0743e5d69f611bba7e4072ddf
URL: https://github.com/llvm/llvm-project/commit/45b7cf995551a0c0743e5d69f611bba7e4072ddf
DIFF: https://github.com/llvm/llvm-project/commit/45b7cf995551a0c0743e5d69f611bba7e4072ddf.diff
LOG: [lld][WebAssembly] Enable string tail merging in debug sections
This is a followup to https://reviews.llvm.org/D97657 which
applied string tail merging to data segments.
Fixes: https://bugs.llvm.org/show_bug.cgi?id=48828
Differential Revision: https://reviews.llvm.org/D102436
Added:
lld/test/wasm/Inputs/merge-string-debug2.s
lld/test/wasm/merge-string-debug.s
Modified:
lld/wasm/Driver.cpp
lld/wasm/InputChunks.cpp
lld/wasm/InputChunks.h
lld/wasm/InputFiles.cpp
lld/wasm/InputFiles.h
lld/wasm/OutputSections.cpp
lld/wasm/OutputSections.h
lld/wasm/OutputSegment.cpp
lld/wasm/Symbols.cpp
lld/wasm/Symbols.h
lld/wasm/Writer.cpp
llvm/include/llvm/MC/MCContext.h
llvm/lib/MC/MCObjectFileInfo.cpp
Removed:
################################################################################
diff --git a/lld/test/wasm/Inputs/merge-string-debug2.s b/lld/test/wasm/Inputs/merge-string-debug2.s
new file mode 100644
index 000000000000..1f4979056ac8
--- /dev/null
+++ b/lld/test/wasm/Inputs/merge-string-debug2.s
@@ -0,0 +1,4 @@
+.section .debug_str,"S",@
+ .asciz "clang version 13.0.0"
+ .asciz "bar"
+ .asciz "foo"
diff --git a/lld/test/wasm/merge-string-debug.s b/lld/test/wasm/merge-string-debug.s
new file mode 100644
index 000000000000..4a663501959f
--- /dev/null
+++ b/lld/test/wasm/merge-string-debug.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/merge-string-debug2.s -o %t2.o
+
+# RUN: wasm-ld %t.o %t2.o -o %t.wasm --no-entry
+# RUN: llvm-readobj -x .debug_str %t.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O1
+
+# RUN: wasm-ld -O0 %t.o %t2.o -o %tO0.wasm --no-entry
+# RUN: llvm-readobj -x .debug_str %tO0.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O0
+
+.section .debug_str,"S",@
+ .asciz "clang version 13.0.0"
+ .asciz "foobar"
+
+# CHECK: Hex dump of section '.debug_str':
+
+# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13
+# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
+# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
+# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo.
+
+# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
+# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 6fec9d5b2278..9b01a84f7812 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -796,7 +796,7 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
}
static void splitSections() {
- // splitIntoPieces needs to be called on each MergeInputSection
+ // splitIntoPieces needs to be called on each MergeInputChunk
// before calling finalizeContents().
LLVM_DEBUG(llvm::dbgs() << "splitSections\n");
parallelForEach(symtab->objectFiles, [](ObjFile *file) {
@@ -804,6 +804,10 @@ static void splitSections() {
if (auto *s = dyn_cast<MergeInputChunk>(seg))
s->splitIntoPieces();
}
+ for (InputChunk *sec : file->customSections) {
+ if (auto *s = dyn_cast<MergeInputChunk>(sec))
+ s->splitIntoPieces();
+ }
});
}
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index b7d1aa25e9c8..279cd41d1ff2 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -328,24 +328,24 @@ void InputFunction::writeCompressed(uint8_t *buf) const {
LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
}
-uint64_t InputChunk::getOffset(uint64_t offset) const {
- return outSecOff + offset;
-}
-
-uint64_t InputChunk::getSegmentOffset(uint64_t offset) const {
+uint64_t InputChunk::getChunkOffset(uint64_t offset) const {
if (const auto *ms = dyn_cast<MergeInputChunk>(this)) {
- LLVM_DEBUG(dbgs() << "getSegmentOffset(merged): " << getName() << "\n");
+ LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << getName() << "\n");
LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
<< "\n");
assert(ms->parent);
- return ms->parent->getSegmentOffset(ms->getParentOffset(offset));
+ return ms->parent->getChunkOffset(ms->getParentOffset(offset));
}
return outputSegmentOffset + offset;
}
+uint64_t InputChunk::getOffset(uint64_t offset) const {
+ return outSecOff + getChunkOffset(offset);
+}
+
uint64_t InputChunk::getVA(uint64_t offset) const {
- return (outputSeg ? outputSeg->startVA : 0) + getSegmentOffset(offset);
+ return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset);
}
// Generate code to apply relocations to the data section at runtime.
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h
index 47ed7ccaadcb..f1174d937ad3 100644
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@@ -43,7 +43,7 @@ class InputChunk {
MergedChunk,
Function,
SyntheticFunction,
- Section
+ Section,
};
StringRef name;
@@ -62,12 +62,15 @@ class InputChunk {
ArrayRef<WasmRelocation> getRelocations() const { return relocations; }
void setRelocations(ArrayRef<WasmRelocation> rs) { relocations = rs; }
- // Translate an offset in the input section to an offset in the output
+ // Translate an offset into the input chunk to an offset in the output
// section.
uint64_t getOffset(uint64_t offset) const;
- // For data segments, translate and offset into the input segment into
- // an offset into the output segment
- uint64_t getSegmentOffset(uint64_t offset) const;
+ // Translate an offset into the input chunk into an offset into the output
+ // chunk. For data segments (InputSegment) this will return and offset into
+ // the output segment. For MergeInputChunk, this will return an offset into
+ // the parent merged chunk. For other chunk types this is no-op and we just
+ // return unmodified offset.
+ uint64_t getChunkOffset(uint64_t offset) const;
uint64_t getVA(uint64_t offset = 0) const;
uint32_t getComdat() const { return comdat; }
@@ -132,22 +135,19 @@ class InputChunk {
// each global variable.
class InputSegment : public InputChunk {
public:
- InputSegment(const WasmSegment *seg, ObjFile *f)
- : InputChunk(f, InputChunk::DataSegment, seg->Data.Name,
- seg->Data.Alignment, seg->Data.LinkingFlags),
+ InputSegment(const WasmSegment &seg, ObjFile *f)
+ : InputChunk(f, InputChunk::DataSegment, seg.Data.Name,
+ seg.Data.Alignment, seg.Data.LinkingFlags),
segment(seg) {
- rawData = segment->Data.Content;
- comdat = segment->Data.Comdat;
- inputSectionOffset = segment->SectionOffset;
+ rawData = segment.Data.Content;
+ comdat = segment.Data.Comdat;
+ inputSectionOffset = segment.SectionOffset;
}
- InputSegment(StringRef name, uint32_t alignment, uint32_t flags)
- : InputChunk(nullptr, InputChunk::DataSegment, name, alignment, flags) {}
-
static bool classof(const InputChunk *c) { return c->kind() == DataSegment; }
protected:
- const WasmSegment *segment = nullptr;
+ const WasmSegment &segment;
};
class SyntheticMergedChunk;
@@ -174,12 +174,19 @@ static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
// This corresponds segments marked as WASM_SEG_FLAG_STRINGS.
class MergeInputChunk : public InputChunk {
public:
- MergeInputChunk(const WasmSegment *seg, ObjFile *f)
- : InputChunk(f, Merge, seg->Data.Name, seg->Data.Alignment,
- seg->Data.LinkingFlags) {
- rawData = seg->Data.Content;
- comdat = seg->Data.Comdat;
- inputSectionOffset = seg->SectionOffset;
+ MergeInputChunk(const WasmSegment &seg, ObjFile *f)
+ : InputChunk(f, Merge, seg.Data.Name, seg.Data.Alignment,
+ seg.Data.LinkingFlags) {
+ rawData = seg.Data.Content;
+ comdat = seg.Data.Comdat;
+ inputSectionOffset = seg.SectionOffset;
+ }
+
+ MergeInputChunk(const WasmSection &s, ObjFile *f)
+ : InputChunk(f, Merge, s.Name, 0, llvm::wasm::WASM_SEG_FLAG_STRINGS) {
+ assert(s.Type == llvm::wasm::WASM_SEC_CUSTOM);
+ comdat = s.Comdat;
+ rawData = s.Content;
}
static bool classof(const InputChunk *s) { return s->kind() == Merge; }
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index c65b05109c3d..7fd9215f1171 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -360,6 +360,17 @@ void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
config->legacyFunctionTable = true;
}
+static bool shouldMerge(const WasmSection &sec) {
+ if (config->optimize == 0)
+ return false;
+ // Sadly we don't have section attributes yet for custom sections, so we
+ // currently go by the name alone.
+ // TODO(sbc): Add ability for wasm sections to carry flags so we don't
+ // need to use names here.
+ return sec.Name.startswith(".debug_str") ||
+ sec.Name.startswith(".debug_line_str");
+}
+
static bool shouldMerge(const WasmSegment &seg) {
// As of now we only support merging strings, and only with single byte
// alignment (2^0).
@@ -445,7 +456,11 @@ void ObjFile::parse(bool ignoreComdats) {
assert(!dataSection);
dataSection = §ion;
} else if (section.Type == WASM_SEC_CUSTOM) {
- auto *customSec = make<InputSection>(section, this);
+ InputChunk *customSec;
+ if (shouldMerge(section))
+ customSec = make<MergeInputChunk>(section, this);
+ else
+ customSec = make<InputSection>(section, this);
customSec->discarded = isExcludedByComdat(customSec);
customSections.emplace_back(customSec);
customSections.back()->setRelocations(section.Relocations);
@@ -466,9 +481,9 @@ void ObjFile::parse(bool ignoreComdats) {
for (const WasmSegment &s : wasmObj->dataSegments()) {
InputChunk *seg;
if (shouldMerge(s)) {
- seg = make<MergeInputChunk>(&s, this);
+ seg = make<MergeInputChunk>(s, this);
} else
- seg = make<InputSegment>(&s, this);
+ seg = make<InputSegment>(s, this);
seg->discarded = isExcludedByComdat(seg);
segments.emplace_back(seg);
@@ -585,7 +600,7 @@ Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
return symtab->addDefinedGlobal(name, flags, this, global);
}
case WASM_SYMBOL_TYPE_SECTION: {
- InputSection *section = customSectionsByIndex[sym.Info.ElementIndex];
+ InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex];
assert(sym.isBindingLocal());
// Need to return null if discarded here? data and func only do that when
// binding is not local.
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index b720b889adcc..49337bd0eb33 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -141,8 +141,8 @@ class ObjFile : public InputFile {
std::vector<InputGlobal *> globals;
std::vector<InputEvent *> events;
std::vector<InputTable *> tables;
- std::vector<InputSection *> customSections;
- llvm::DenseMap<uint32_t, InputSection *> customSectionsByIndex;
+ std::vector<InputChunk *> customSections;
+ llvm::DenseMap<uint32_t, InputChunk *> customSectionsByIndex;
Symbol *getSymbol(uint32_t index) const { return symbols[index]; }
FunctionSymbol *getFunctionSymbol(uint32_t index) const;
diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp
index 780c8e4b2776..e39c3331641a 100644
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -12,6 +12,7 @@
#include "OutputSegment.h"
#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
@@ -234,13 +235,42 @@ bool DataSection::isNeeded() const {
return false;
}
+// Lots of duplication here with OutputSegment::finalizeInputSegments
+void CustomSection::finalizeInputSections() {
+ SyntheticMergedChunk *mergedSection = nullptr;
+ std::vector<InputChunk *> newSections;
+
+ for (InputChunk *s : inputSections) {
+ MergeInputChunk *ms = dyn_cast<MergeInputChunk>(s);
+ if (!ms) {
+ newSections.push_back(s);
+ continue;
+ }
+
+ if (!mergedSection) {
+ mergedSection =
+ make<SyntheticMergedChunk>(name, 0, WASM_SEG_FLAG_STRINGS);
+ newSections.push_back(mergedSection);
+ }
+ mergedSection->addMergeChunk(ms);
+ }
+
+ if (!mergedSection)
+ return;
+
+ mergedSection->finalizeContents();
+ inputSections = newSections;
+}
+
void CustomSection::finalizeContents() {
+ finalizeInputSections();
+
raw_string_ostream os(nameData);
encodeULEB128(name.size(), os);
os << name;
os.flush();
- for (InputSection *section : inputSections) {
+ for (InputChunk *section : inputSections) {
assert(!section->discarded);
section->outputSec = this;
section->outSecOff = payloadSize;
@@ -264,19 +294,19 @@ void CustomSection::writeTo(uint8_t *buf) {
buf += nameData.size();
// Write custom sections payload
- for (const InputSection *section : inputSections)
+ for (const InputChunk *section : inputSections)
section->writeTo(buf);
}
uint32_t CustomSection::getNumRelocations() const {
uint32_t count = 0;
- for (const InputSection *inputSect : inputSections)
+ for (const InputChunk *inputSect : inputSections)
count += inputSect->getNumRelocations();
return count;
}
void CustomSection::writeRelocations(raw_ostream &os) const {
- for (const InputSection *s : inputSections)
+ for (const InputChunk *s : inputSections)
s->writeRelocations(os);
}
diff --git a/lld/wasm/OutputSections.h b/lld/wasm/OutputSections.h
index 444116dac7d8..c3becf6ec240 100644
--- a/lld/wasm/OutputSections.h
+++ b/lld/wasm/OutputSections.h
@@ -111,7 +111,7 @@ class DataSection : public OutputSection {
// separately and are instead synthesized by the linker.
class CustomSection : public OutputSection {
public:
- CustomSection(std::string name, ArrayRef<InputSection *> inputSections)
+ CustomSection(std::string name, ArrayRef<InputChunk *> inputSections)
: OutputSection(llvm::wasm::WASM_SEC_CUSTOM, name),
inputSections(inputSections) {}
@@ -128,8 +128,9 @@ class CustomSection : public OutputSection {
void finalizeContents() override;
protected:
+ void finalizeInputSections();
size_t payloadSize = 0;
- ArrayRef<InputSection *> inputSections;
+ std::vector<InputChunk *> inputSections;
std::string nameData;
};
diff --git a/lld/wasm/OutputSegment.cpp b/lld/wasm/OutputSegment.cpp
index bf3e40c968a0..c09d5c30a0f6 100644
--- a/lld/wasm/OutputSegment.cpp
+++ b/lld/wasm/OutputSegment.cpp
@@ -55,16 +55,15 @@ void OutputSegment::finalizeInputSegments() {
return seg->flags == ms->flags && seg->alignment == ms->alignment;
});
if (i == mergedSegments.end()) {
- LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name
+ LLVM_DEBUG(llvm::dbgs() << "new merge segment: " << name
<< " alignment=" << ms->alignment << "\n");
- SyntheticMergedChunk *syn =
- make<SyntheticMergedChunk>(name, ms->alignment, ms->flags);
+ auto *syn = make<SyntheticMergedChunk>(name, ms->alignment, ms->flags);
syn->outputSeg = this;
mergedSegments.push_back(syn);
i = std::prev(mergedSegments.end());
newSegments.push_back(syn);
} else {
- LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "adding to merge segment: " << name << "\n");
}
(*i)->addMergeChunk(ms);
}
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index 24b7e10dc559..6d960b4cf7e6 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -301,7 +301,7 @@ void DefinedData::setVA(uint64_t value_) {
uint64_t DefinedData::getOutputSegmentOffset() const {
LLVM_DEBUG(dbgs() << "getOutputSegmentOffset: " << getName() << "\n");
- return segment->getSegmentOffset(value);
+ return segment->getChunkOffset(value);
}
uint64_t DefinedData::getOutputSegmentIndex() const {
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index ff6eb0ae7ccb..0c557530236d 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -254,14 +254,14 @@ class OutputSectionSymbol : public Symbol {
class SectionSymbol : public Symbol {
public:
- SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
+ SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
: Symbol("", SectionKind, flags, f), section(s) {}
static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
const OutputSectionSymbol *getOutputSectionSymbol() const;
- const InputSection *section;
+ const InputChunk *section;
};
class DataSymbol : public Symbol {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 055972560b81..1705c205eca4 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -100,7 +100,7 @@ class Writer {
uint64_t fileSize = 0;
std::vector<WasmInitEntry> initFunctions;
- llvm::StringMap<std::vector<InputSection *>> customSectionMapping;
+ llvm::StringMap<std::vector<InputChunk *>> customSectionMapping;
// Stable storage for command export wrapper function name strings.
std::list<std::string> commandExportWrapperNames;
@@ -121,7 +121,7 @@ void Writer::calculateCustomSections() {
log("calculateCustomSections");
bool stripDebug = config->stripDebug || config->stripAll;
for (ObjFile *file : symtab->objectFiles) {
- for (InputSection *section : file->customSections) {
+ for (InputChunk *section : file->customSections) {
// Exclude COMDAT sections that are not selected for inclusion
if (section->discarded)
continue;
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index c531aa851d1e..100befc48ecb 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -610,8 +610,9 @@ namespace llvm {
getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym,
unsigned UniqueID = GenericSectionID);
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) {
- return getWasmSection(Section, K, 0, nullptr);
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags = 0) {
+ return getWasmSection(Section, K, Flags, nullptr);
}
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 92091ffcb746..1a448f040b3b 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
@@ -791,9 +792,10 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfLineSection =
Ctx->getWasmSection(".debug_line", SectionKind::getMetadata());
DwarfLineStrSection =
- Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata());
- DwarfStrSection =
- Ctx->getWasmSection(".debug_str", SectionKind::getMetadata());
+ Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata(),
+ wasm::WASM_SEG_FLAG_STRINGS);
+ DwarfStrSection = Ctx->getWasmSection(
+ ".debug_str", SectionKind::getMetadata(), wasm::WASM_SEG_FLAG_STRINGS);
DwarfLocSection =
Ctx->getWasmSection(".debug_loc", SectionKind::getMetadata());
DwarfAbbrevSection =
@@ -836,7 +838,8 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfAbbrevDWOSection =
Ctx->getWasmSection(".debug_abbrev.dwo", SectionKind::getMetadata());
DwarfStrDWOSection =
- Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata());
+ Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata(),
+ wasm::WASM_SEG_FLAG_STRINGS);
DwarfLineDWOSection =
Ctx->getWasmSection(".debug_line.dwo", SectionKind::getMetadata());
DwarfLocDWOSection =
More information about the llvm-commits
mailing list