[llvm] r267317 - Add an internalization step to the ThinLTOCodeGenerator
Mehdi Amini via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 23 20:18:02 PDT 2016
Author: mehdi_amini
Date: Sat Apr 23 22:18:01 2016
New Revision: 267317
URL: http://llvm.org/viewvc/llvm-project?rev=267317&view=rev
Log:
Add an internalization step to the ThinLTOCodeGenerator
Keeping as much as possible internal/private is
known to help the optimizer. Let's try to benefit from
this in ThinLTO.
Note: this is early work, but is enough to build clang (and
all the LLVM tools). I still need to write some lit-tests...
Differential Revision: http://reviews.llvm.org/D19103
From: Mehdi Amini <mehdi.amini at apple.com>
Added:
llvm/trunk/test/ThinLTO/X86/internalize.ll
Modified:
llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h
llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp
llvm/trunk/tools/llvm-lto/llvm-lto.cpp
Modified: llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h (original)
+++ llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h Sat Apr 23 22:18:01 2016
@@ -201,6 +201,11 @@ public:
void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
/**
+ * Perform internalization.
+ */
+ void internalize(Module &Module, ModuleSummaryIndex &Index);
+
+ /**
* Perform post-importing ThinLTO optimizations.
*/
void optimize(Module &Module);
Modified: llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp (original)
+++ llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp Sat Apr 23 22:18:01 2016
@@ -17,6 +17,8 @@
#ifdef HAVE_LLVM_REVISION
#include "LLVMLTORevision.h"
#endif
+
+#include "UpdateCompilerUsed.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -32,6 +34,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CachePruning.h"
@@ -44,6 +47,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@@ -309,6 +313,77 @@ static void optimizeModule(Module &TheMo
PM.run(TheModule);
}
+// Create a DenseSet of GlobalValue to be used with the Internalizer.
+static DenseSet<const GlobalValue *> computePreservedSymbolsForModule(
+ Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+ const FunctionImporter::ExportSetTy &ExportList) {
+ DenseSet<const GlobalValue *> PreservedGV;
+ if (GUIDPreservedSymbols.empty())
+ // Early exit: internalize is disabled when there is nothing to preserve.
+ return PreservedGV;
+
+ auto AddPreserveGV = [&](const GlobalValue &GV) {
+ auto GUID = GV.getGUID();
+ if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID))
+ PreservedGV.insert(&GV);
+ };
+
+ for (auto &GV : TheModule)
+ AddPreserveGV(GV);
+ for (auto &GV : TheModule.globals())
+ AddPreserveGV(GV);
+ for (auto &GV : TheModule.aliases())
+ AddPreserveGV(GV);
+
+ return PreservedGV;
+}
+
+// Run internalization on \p TheModule
+static void
+doInternalizeModule(Module &TheModule, const TargetMachine &TM,
+ const DenseSet<const GlobalValue *> &PreservedGV) {
+ if (PreservedGV.empty()) {
+ // Be friendly and don't nuke totally the module when the client didn't
+ // supply anything to preserve.
+ return;
+ }
+
+ // Parse inline ASM and collect the list of symbols that are not defined in
+ // the current module.
+ StringSet<> AsmUndefinedRefs;
+ object::IRObjectFile::CollectAsmUndefinedRefs(
+ Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+ [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ if (Flags & object::BasicSymbolRef::SF_Undefined)
+ AsmUndefinedRefs.insert(Name);
+ });
+
+ // Update the llvm.compiler_used globals to force preserving libcalls and
+ // symbols referenced from asm
+ UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs);
+
+ // Declare a callback for the internalize pass that will ask for every
+ // candidate GlobalValue if it can be internalized or not.
+ auto MustPreserveGV =
+ [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); };
+
+ llvm::internalizeModule(TheModule, MustPreserveGV);
+}
+
+// Convert the PreservedSymbols map from "Name" based to "GUID" based.
+static DenseSet<GlobalValue::GUID>
+computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
+ const Triple &TheTriple) {
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
+ for (auto &Entry : PreservedSymbols) {
+ StringRef Name = Entry.first();
+ if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
+ Name = Name.drop_front();
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
+ }
+ return GUIDPreservedSymbols;
+}
+
std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
TargetMachine &TM) {
SmallVector<char, 128> OutputBuffer;
@@ -395,6 +470,9 @@ public:
sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
}
+ // Access the path to this entry in the cache.
+ StringRef getEntryPath() { return EntryPath; }
+
// Try loading the buffer for this cache entry.
ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
if (EntryPath.empty())
@@ -429,6 +507,8 @@ static std::unique_ptr<MemoryBuffer> Pro
Module &TheModule, const ModuleSummaryIndex &Index,
StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
StringRef SaveTempsDir, unsigned count) {
@@ -436,6 +516,13 @@ static std::unique_ptr<MemoryBuffer> Pro
// Save temps: after IPO.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
+ // Prepare for internalization by computing the set of symbols to preserve.
+ // We need to compute the list of symbols to preserve during internalization
+ // before doing any promotion because after renaming we won't (easily) match
+ // to the original name.
+ auto PreservedGV = computePreservedSymbolsForModule(
+ TheModule, GUIDPreservedSymbols, ExportList);
+
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@@ -449,16 +536,24 @@ static std::unique_ptr<MemoryBuffer> Pro
// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
+ }
+ // Internalization
+ doInternalizeModule(TheModule, TM, PreservedGV);
+
+ // Save internalized bitcode
+ saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc");
+
+ if (!SingleModule) {
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
// Save temps: after cross-module import.
- saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
+ saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc");
}
optimizeModule(TheModule, TM);
- saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
+ saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc");
if (DisableCodeGen) {
// Configured to stop before CodeGen, serialize the bitcode and return.
@@ -516,7 +611,10 @@ void ThinLTOCodeGenerator::preserveSymbo
}
void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
- CrossReferencedSymbols.insert(Name);
+ // FIXME: At the moment, we don't take advantage of this extra information,
+ // we're conservatively considering cross-references as preserved.
+ // CrossReferencedSymbols.insert(Name);
+ PreservedSymbols.insert(Name);
}
// TargetMachine factory
@@ -620,10 +718,43 @@ void ThinLTOCodeGenerator::crossModuleIm
}
/**
+ * Perform internalization.
+ */
+void ThinLTOCodeGenerator::internalize(Module &TheModule,
+ ModuleSummaryIndex &Index) {
+ initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
+ auto ModuleCount = Index.modulePaths().size();
+ auto ModuleIdentifier = TheModule.getModuleIdentifier();
+
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols =
+ computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+
+ // Collect for each module the list of function it defines (GUID -> Summary).
+ StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
+ ModuleToDefinedGVSummaries(ModuleCount);
+ Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+
+ // Generate import/export list
+ StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ExportLists);
+ auto &ExportList = ExportLists[ModuleIdentifier];
+
+ // Internalization
+ auto PreservedGV = computePreservedSymbolsForModule(
+ TheModule, GUIDPreservedSymbols, ExportList);
+ doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV);
+}
+
+/**
* Perform post-importing ThinLTO optimizations.
*/
void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
+
+ // Optimize now
optimizeModule(TheModule, *TMBuilder.create());
}
@@ -694,10 +825,9 @@ void ThinLTOCodeGenerator::run() {
ExportLists);
// Convert the preserved symbols set from string to GUID, this is needed for
- // computing the caching.
- DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
- for (auto &Entry : PreservedSymbols)
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first()));
+ // computing the caching hash and the internalization.
+ auto GUIDPreservedSymbols =
+ computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
// Parallel optimizer + codegen
{
@@ -714,18 +844,21 @@ void ThinLTOCodeGenerator::run() {
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
- ResolveODR(*Index, ExportList, DefinedFunctions,
- ModuleIdentifier, ResolvedODR);
+ ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier,
+ ResolvedODR);
// The module may be cached, this helps handling it.
- ModuleCacheEntry CacheEntry(
- CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(),
- ImportLists[ModuleBuffer.getBufferIdentifier()],
- ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR,
- DefinedFunctions, GUIDPreservedSymbols);
+ ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
+ ImportLists[ModuleIdentifier], ExportList,
+ ResolvedODR, DefinedFunctions,
+ GUIDPreservedSymbols);
{
auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
+ DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
+ << CacheEntry.getEntryPath() << "' for buffer " << count
+ << " " << ModuleIdentifier << "\n");
+
if (ErrOrBuffer) {
// Cache Hit!
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
@@ -741,14 +874,14 @@ void ThinLTOCodeGenerator::run() {
auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
// Save temps: original file.
- if (!SaveTempsDir.empty()) {
- saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
- }
+ saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
auto &ImportList = ImportLists[ModuleIdentifier];
+ // Run the main process now, and generates a binary
auto OutputBuffer = ProcessThinLTOModule(
*TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
- ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count);
+ ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions,
+ DisableCodeGen, SaveTempsDir, count);
CacheEntry.write(*OutputBuffer);
ProducedBinaries[count] = std::move(OutputBuffer);
Added: llvm/trunk/test/ThinLTO/X86/internalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/internalize.ll?rev=267317&view=auto
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/internalize.ll (added)
+++ llvm/trunk/test/ThinLTO/X86/internalize.ll Sat Apr 23 22:18:01 2016
@@ -0,0 +1,19 @@
+;; RUN: opt -module-summary %s -o %t1.bc
+; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc
+; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR
+; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE
+
+; REGULAR: define void @foo
+; REGULAR: define void @bar
+; INTERNALIZE: define void @foo
+; INTERNALIZE: define internal void @bar
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @foo() {
+ ret void
+}
+define void @bar() {
+ ret void
+}
\ No newline at end of file
Modified: llvm/trunk/tools/llvm-lto/llvm-lto.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-lto/llvm-lto.cpp?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-lto/llvm-lto.cpp (original)
+++ llvm/trunk/tools/llvm-lto/llvm-lto.cpp Sat Apr 23 22:18:01 2016
@@ -68,6 +68,7 @@ enum ThinLTOModes {
THINLINK,
THINPROMOTE,
THINIMPORT,
+ THININTERNALIZE,
THINOPT,
THINCODEGEN,
THINALL
@@ -84,6 +85,9 @@ cl::opt<ThinLTOModes> ThinLTOMode(
clEnumValN(THINIMPORT, "import", "Perform both promotion and "
"cross-module importing (requires "
"-thinlto-index)."),
+ clEnumValN(THININTERNALIZE, "internalize",
+ "Perform internalization driven by -exported-symbol "
+ "(requires -thinlto-index)."),
clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
@@ -105,10 +109,10 @@ static cl::opt<std::string> OutputFilena
cl::desc("Override output filename"),
cl::value_desc("filename"));
-static cl::list<std::string>
- ExportedSymbols("exported-symbol",
- cl::desc("Symbol to export from the resulting object file"),
- cl::ZeroOrMore);
+static cl::list<std::string> ExportedSymbols(
+ "exported-symbol",
+ cl::desc("List of symbols to export from the resulting object file"),
+ cl::ZeroOrMore);
static cl::list<std::string>
DSOSymbols("dso-symbol",
@@ -329,6 +333,10 @@ public:
ThinLTOProcessing(const TargetOptions &Options) {
ThinGenerator.setCodePICModel(RelocModel);
ThinGenerator.setTargetOptions(Options);
+
+ // Add all the exported symbols to the table of symbols to preserve.
+ for (unsigned i = 0; i < ExportedSymbols.size(); ++i)
+ ThinGenerator.preserveSymbol(ExportedSymbols[i]);
}
void run() {
@@ -339,6 +347,8 @@ public:
return promote();
case THINIMPORT:
return import();
+ case THININTERNALIZE:
+ return internalize();
case THINOPT:
return optimize();
case THINCODEGEN:
@@ -430,6 +440,37 @@ private:
}
writeModuleToFile(*TheModule, OutputName);
}
+ }
+
+ void internalize() {
+ if (InputFilenames.size() != 1 && !OutputFilename.empty())
+ report_fatal_error("Can't handle a single output filename and multiple "
+ "input files, do not provide an output filename and "
+ "the output files will be suffixed from the input "
+ "ones.");
+
+ if (ExportedSymbols.empty())
+ errs() << "Warning: -internalize will not perform without "
+ "-exported-symbol\n";
+
+ auto Index = loadCombinedIndex();
+ auto InputBuffers = loadAllFilesForIndex(*Index);
+ for (auto &MemBuffer : InputBuffers)
+ ThinGenerator.addModule(MemBuffer->getBufferIdentifier(),
+ MemBuffer->getBuffer());
+
+ for (auto &Filename : InputFilenames) {
+ LLVMContext Ctx;
+ auto TheModule = loadModule(Filename, Ctx);
+
+ ThinGenerator.internalize(*TheModule, *Index);
+
+ std::string OutputName = OutputFilename;
+ if (OutputName.empty()) {
+ OutputName = Filename + ".thinlto.internalized.bc";
+ }
+ writeModuleToFile(*TheModule, OutputName);
+ }
}
void optimize() {
More information about the llvm-commits
mailing list