[lld] a96fe1b - [ELF][LTO] Call madvise(MADV_DONTNEED) on MemoryBuffer instances
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 30 11:37:01 PST 2021
Author: Fangrui Song
Date: 2021-12-30T11:36:58-08:00
New Revision: a96fe1bf3b320d62f1564ded5f7259eef1869cf9
URL: https://github.com/llvm/llvm-project/commit/a96fe1bf3b320d62f1564ded5f7259eef1869cf9
DIFF: https://github.com/llvm/llvm-project/commit/a96fe1bf3b320d62f1564ded5f7259eef1869cf9.diff
LOG: [ELF][LTO] Call madvise(MADV_DONTNEED) on MemoryBuffer instances
@tejohnson noticed that freeing MemoryBuffer instances right before
`lto->compile` can save RSS, likely because the memory can be reused by
LTO indexing (e.g. ThinLTO import/export lists).).
For ELFFileBase instances, symbol and section names are backed by MemoryBuffer,
so destroying MemoryBuffer would make some infrequent passes (parseSymbolVersion,
reportBackrefs) crash and make debugging difficult.
For a BitcodeFile, its content is completely unused, but destroying its
MemoryBuffer makes the buffer identifier inaccessible and may introduce
constraints for future changes.
This patch leverages madvise(MADV_DONTNEED) which achieves the major gain
without the latent issues.
`Maximum resident set size (kbytes): ` for a large --thinlto-index-only link:
* current behavior: 10146104KiB
* destroy MemoryBuffer instances: 8555240KiB
* madvise(MADV_DONTNEED) just bitcodeFiles and lazyBitcodeFiles: 8737372KiB
* madvise(MADV_DONTNEED) all MemoryBuffers: 8739796KiB (16% decrease)
Depends on D116366
Reviewed By: tejohnson
Differential Revision: https://reviews.llvm.org/D116367
Added:
Modified:
lld/ELF/Driver.cpp
lld/ELF/Driver.h
lld/ELF/InputFiles.cpp
lld/ELF/InputFiles.h
Removed:
################################################################################
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index dab6a537a2eee..505602f7da626 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -87,6 +87,7 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
inputSections.clear();
outputSections.clear();
+ memoryBuffers.clear();
archiveFiles.clear();
binaryFiles.clear();
bitcodeFiles.clear();
@@ -1987,6 +1988,28 @@ static Symbol *addUnusedUndefined(StringRef name,
return symtab->addSymbol(sym);
}
+static void markBuffersAsDontNeed(bool skipLinkedOutput) {
+ // With --thinlto-index-only, all buffers are nearly unused from now on
+ // (except symbol/section names used by infrequent passes). Mark input file
+ // buffers as MADV_DONTNEED so that these pages can be reused by the expensive
+ // thin link, saving memory.
+ if (skipLinkedOutput) {
+ for (MemoryBuffer &mb : llvm::make_pointee_range(memoryBuffers))
+ mb.dontNeedIfMmap();
+ return;
+ }
+
+ // Otherwise, just mark MemoryBuffers backing BitcodeFiles.
+ DenseSet<const char *> bufs;
+ for (BitcodeFile *file : bitcodeFiles)
+ bufs.insert(file->mb.getBufferStart());
+ for (BitcodeFile *file : lazyBitcodeFiles)
+ bufs.insert(file->mb.getBufferStart());
+ for (MemoryBuffer &mb : llvm::make_pointee_range(memoryBuffers))
+ if (bufs.count(mb.getBufferStart()))
+ mb.dontNeedIfMmap();
+}
+
// This function is where all the optimizations of link-time
// optimization takes place. When LTO is in use, some input files are
// not in native object file format but in the LLVM bitcode format.
@@ -1994,13 +2017,17 @@ static Symbol *addUnusedUndefined(StringRef name,
// using LLVM functions and replaces bitcode symbols with the results.
// Because all bitcode files that the program consists of are passed to
// the compiler at once, it can do a whole-program optimization.
-template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
+template <class ELFT>
+void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) {
llvm::TimeTraceScope timeScope("LTO");
// Compile bitcode files and replace bitcode symbols.
lto.reset(new BitcodeCompiler);
for (BitcodeFile *file : bitcodeFiles)
lto->add(*file);
+ if (!bitcodeFiles.empty())
+ markBuffersAsDontNeed(skipLinkedOutput);
+
for (InputFile *file : lto->compile()) {
auto *obj = cast<ObjFile<ELFT>>(file);
obj->parse(/*ignoreComdats=*/true);
@@ -2364,28 +2391,31 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
symtab->scanVersionScript();
}
+ // Skip the normal linked output if some LTO options are specified.
+ //
+ // For --thinlto-index-only, index file creation is performed in
+ // compileBitcodeFiles, so we are done afterwards. --plugin-opt=emit-llvm and
+ // --plugin-opt=emit-asm create output files in bitcode or assembly code,
+ // respectively. When only certain thinLTO modules are specified for
+ // compilation, the intermediate object file are the expected output.
+ const bool skipLinkedOutput = config->thinLTOIndexOnly || config->emitLLVM ||
+ config->ltoEmitAsm ||
+ !config->thinLTOModulesToCompile.empty();
+
// Do link-time optimization if given files are LLVM bitcode files.
// This compiles bitcode files into real object files.
//
// With this the symbol table should be complete. After this, no new names
// except a few linker-synthesized ones will be added to the symbol table.
- compileBitcodeFiles<ELFT>();
+ compileBitcodeFiles<ELFT>(skipLinkedOutput);
// Symbol resolution finished. Report backward reference problems.
reportBackrefs();
if (errorCount())
return;
- // If --thinlto-index-only is given, we should create only "index
- // files" and not object files. Index file creation is already done
- // in compileBitcodeFiles, so we are done if that's the case.
- // Likewise, --plugin-opt=emit-llvm and --plugin-opt=emit-asm are the
- // options to create output files in bitcode or assembly code
- // respectively. No object files are generated.
- // Also bail out here when only certain thinLTO modules are specified for
- // compilation. The intermediate object file are the expected output.
- if (config->thinLTOIndexOnly || config->emitLLVM || config->ltoEmitAsm ||
- !config->thinLTOModulesToCompile.empty())
+ // Bail out if normal linked output is skipped due to LTO.
+ if (skipLinkedOutput)
return;
// Handle --exclude-libs again because lto.tmp may reference additional
diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h
index 5961e1f694729..b8cbb3b19268f 100644
--- a/lld/ELF/Driver.h
+++ b/lld/ELF/Driver.h
@@ -34,7 +34,7 @@ class LinkerDriver {
void createFiles(llvm::opt::InputArgList &args);
void inferMachineType();
template <class ELFT> void link(llvm::opt::InputArgList &args);
- template <class ELFT> void compileBitcodeFiles();
+ template <class ELFT> void compileBitcodeFiles(bool skipLinkedOutput);
// True if we are in --whole-archive and --no-whole-archive.
bool inWholeArchive = false;
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index f1e29547de12c..dd7fd954bc51f 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -43,6 +43,7 @@ using namespace lld::elf;
bool InputFile::isInGroup;
uint32_t InputFile::nextGroupId;
+SmallVector<std::unique_ptr<MemoryBuffer>> elf::memoryBuffers;
SmallVector<ArchiveFile *, 0> elf::archiveFiles;
SmallVector<BinaryFile *, 0> elf::binaryFiles;
SmallVector<BitcodeFile *, 0> elf::bitcodeFiles;
@@ -122,9 +123,8 @@ Optional<MemoryBufferRef> elf::readFile(StringRef path) {
return None;
}
- std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
- MemoryBufferRef mbref = mb->getMemBufferRef();
- make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership
+ MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
+ memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 7bf5423bed44b..1084cf94313b5 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -407,6 +407,7 @@ inline bool isBitcode(MemoryBufferRef mb) {
std::string replaceThinLTOSuffix(StringRef path);
+extern SmallVector<std::unique_ptr<MemoryBuffer>> memoryBuffers;
extern SmallVector<ArchiveFile *, 0> archiveFiles;
extern SmallVector<BinaryFile *, 0> binaryFiles;
extern SmallVector<BitcodeFile *, 0> bitcodeFiles;
More information about the llvm-commits
mailing list