[clang] [LinkerWrapper] Remove in-house handling of LTO (PR #113715)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 25 10:25:22 PDT 2024
https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/113715
Summary:
This should be the linker's job if the user creates any bitcode files,
then passing `-flto` to the linker for the toolchain should be able to
handle it. Right now this path is only used in the case where someone
does LTO w/ ld.gold targeting a CPU so I think we are safe here as that
will still be forwarded, for bfd it'll be an error as it would on the
host. I think I talked the SYCL team out of using this as well so I
should be good to delete it.
>From 2cdfef87c2922fa05ce487badc365b32815f1738 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 25 Oct 2024 12:23:15 -0500
Subject: [PATCH] [LinkerWrapper] Remove in-house handling of LTO
Summary:
This should be the linker's job if the user creates any bitcode files,
then passing `-flto` to the linker for the toolchain should be able to
handle it. Right now this path is only used in the case where someone
does LTO w/ ld.gold targeting a CPU so I think we are safe here as that
will still be forwarded, for bfd it'll be an error as it would on the
host. I think I talked the SYCL team out of using this as well so I
should be good to delete it.
---
.../ClangLinkerWrapper.cpp | 345 +-----------------
1 file changed, 2 insertions(+), 343 deletions(-)
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 9fea1fdcd5fb46..804514d9c9777a 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -140,9 +140,6 @@ static std::list<SmallString<128>> TempFiles;
/// Codegen flags for LTO backend.
static codegen::RegisterCodeGenFlags CodeGenFlags;
-/// Global flag to indicate that the LTO pipeline threw an error.
-static std::atomic<bool> LTOError;
-
using OffloadingImage = OffloadBinary::OffloadingImage;
namespace llvm {
@@ -293,12 +290,10 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
return *Path;
}
-/// We will defer LTO to the target's linker if we are not doing JIT and it is
-/// supported by the toolchain.
bool linkerSupportsLTO(const ArgList &Args) {
llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
return Triple.isNVPTX() || Triple.isAMDGPU() ||
- Args.getLastArgValue(OPT_linker_path_EQ).ends_with("ld.lld");
+ Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld");
}
/// Returns the hashed value for a constant string.
@@ -653,7 +648,6 @@ void diagnosticHandler(const DiagnosticInfo &DI) {
switch (DI.getSeverity()) {
case DS_Error:
WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n";
- LTOError = true;
break;
case DS_Warning:
WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n";
@@ -667,334 +661,6 @@ void diagnosticHandler(const DiagnosticInfo &DI) {
}
}
-// Get the list of target features from the input file and unify them such that
-// if there are multiple +xxx or -xxx features we only keep the last one.
-std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
- SmallVector<StringRef> Features;
- for (const OffloadFile &File : InputFiles) {
- for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ","))
- Features.emplace_back(Arg);
- }
-
- // Only add a feature if it hasn't been seen before starting from the end.
- std::vector<std::string> UnifiedFeatures;
- DenseSet<StringRef> UsedFeatures;
- for (StringRef Feature : llvm::reverse(Features)) {
- if (UsedFeatures.insert(Feature.drop_front()).second)
- UnifiedFeatures.push_back(Feature.str());
- }
-
- return UnifiedFeatures;
-}
-
-template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
-std::unique_ptr<lto::LTO> createLTO(
- const ArgList &Args, const std::vector<std::string> &Features,
- ModuleHook Hook = [](size_t, const Module &) { return true; }) {
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- // We need to remove AMD's target-id from the processor if present.
- StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ);
- StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID);
- lto::Config Conf;
- lto::ThinBackend Backend;
- // TODO: Handle index-only thin-LTO
- Backend =
- lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
-
- Conf.CPU = Arch.str();
- Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple);
-
- Conf.RemarksFilename = RemarksFilename;
- Conf.RemarksPasses = RemarksPasses;
- Conf.RemarksWithHotness = RemarksWithHotness;
- Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
- Conf.RemarksFormat = RemarksFormat;
-
- StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
- Conf.MAttrs = Features;
- std::optional<CodeGenOptLevel> CGOptLevelOrNone =
- CodeGenOpt::parseLevel(OptLevel[1]);
- assert(CGOptLevelOrNone && "Invalid optimization level");
- Conf.CGOptLevel = *CGOptLevelOrNone;
- Conf.OptLevel = OptLevel[1] - '0';
- Conf.DefaultTriple = Triple.getTriple();
-
- // TODO: Should we complain about combining --opt-level and -passes, as opt
- // does? That might be too limiting in clang-linker-wrapper, so for now we
- // just warn in the help entry for -passes that the default<O?> corresponding
- // to --opt-level=O? should be included there. The problem is that
- // --opt-level produces effects in clang-linker-wrapper beyond what -passes
- // appears to be able to achieve, so rejecting the combination of --opt-level
- // and -passes would apparently make it impossible to combine those effects
- // with a custom pass pipeline.
- Conf.OptPipeline = PassPipeline;
- Conf.PassPlugins = PassPlugins;
-
- LTOError = false;
- Conf.DiagHandler = diagnosticHandler;
-
- Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
- Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
-
- if (SaveTemps) {
- std::string TempName = (sys::path::filename(ExecutableName) + "." +
- Triple.getTriple() + "." + TargetID)
- .str();
- Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) {
- std::string File =
- !Task ? TempName + ".postlink.bc"
- : TempName + "." + std::to_string(Task) + ".postlink.bc";
- error_code EC;
- raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
- if (EC)
- reportError(errorCodeToError(EC));
- WriteBitcodeToFile(M, LinkedBitcode);
- return true;
- };
- Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) {
- std::string File =
- !Task ? TempName + ".postopt.bc"
- : TempName + "." + std::to_string(Task) + ".postopt.bc";
- error_code EC;
- raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
- if (EC)
- reportError(errorCodeToError(EC));
- WriteBitcodeToFile(M, LinkedBitcode);
- return true;
- };
- }
- Conf.PostOptModuleHook = Hook;
- Conf.CGFileType = (Triple.isNVPTX() || SaveTemps)
- ? CodeGenFileType::AssemblyFile
- : CodeGenFileType::ObjectFile;
-
- // TODO: Handle remark files
- Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program);
-
- return std::make_unique<lto::LTO>(std::move(Conf), Backend);
-}
-
-// Returns true if \p S is valid as a C language identifier and will be given
-// `__start_` and `__stop_` symbols.
-bool isValidCIdentifier(StringRef S) {
- return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
- llvm::all_of(llvm::drop_begin(S),
- [](char C) { return C == '_' || isAlnum(C); });
-}
-
-Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
- SmallVectorImpl<StringRef> &OutputFiles,
- const ArgList &Args) {
- llvm::TimeTraceScope TimeScope("Link bitcode files");
- const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
- StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-
- SmallVector<OffloadFile, 4> BitcodeInputFiles;
- DenseSet<StringRef> StrongResolutions;
- DenseSet<StringRef> UsedInRegularObj;
- DenseSet<StringRef> UsedInSharedLib;
- BumpPtrAllocator Alloc;
- StringSaver Saver(Alloc);
-
- // Search for bitcode files in the input and create an LTO input file. If
- // it is not a bitcode file, scan its symbol table for symbols we need to
- // save.
- for (OffloadFile &File : InputFiles) {
- MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "");
-
- file_magic Type = identify_magic(Buffer.getBuffer());
- switch (Type) {
- case file_magic::bitcode: {
- Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
- if (!IRSymtabOrErr)
- return IRSymtabOrErr.takeError();
-
- // Check for any strong resolutions we need to preserve.
- for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
- for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
- if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() &&
- !Sym.isUndefined())
- StrongResolutions.insert(Saver.save(Sym.Name));
- }
- }
- BitcodeInputFiles.emplace_back(std::move(File));
- continue;
- }
- case file_magic::elf_relocatable:
- case file_magic::elf_shared_object: {
- Expected<std::unique_ptr<ObjectFile>> ObjFile =
- ObjectFile::createObjectFile(Buffer);
- if (!ObjFile)
- continue;
-
- for (SymbolRef Sym : (*ObjFile)->symbols()) {
- Expected<StringRef> Name = Sym.getName();
- if (!Name)
- return Name.takeError();
-
- // Record if we've seen these symbols in any object or shared
- // libraries.
- if ((*ObjFile)->isRelocatableObject())
- UsedInRegularObj.insert(Saver.save(*Name));
- else
- UsedInSharedLib.insert(Saver.save(*Name));
- }
- continue;
- }
- default:
- continue;
- }
- }
-
- if (BitcodeInputFiles.empty())
- return Error::success();
-
- // Remove all the bitcode files that we moved from the original input.
- llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
-
- // LTO Module hook to output bitcode without running the backend.
- SmallVector<StringRef> BitcodeOutput;
- auto OutputBitcode = [&](size_t, const Module &M) {
- auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
- "-jit-" + Triple.getTriple(),
- "bc");
- if (!TempFileOrErr)
- reportError(TempFileOrErr.takeError());
-
- std::error_code EC;
- raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
- if (EC)
- reportError(errorCodeToError(EC));
- WriteBitcodeToFile(M, LinkedBitcode);
- BitcodeOutput.push_back(*TempFileOrErr);
- return false;
- };
-
- // We assume visibility of the whole program if every input file was
- // bitcode.
- auto Features = getTargetFeatures(BitcodeInputFiles);
- auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
- Args.hasArg(OPT_builtin_bitcode_EQ) ||
- Args.hasArg(OPT_clang_backend)
- ? createLTO(Args, Features, OutputBitcode)
- : createLTO(Args, Features);
-
- // We need to resolve the symbols so the LTO backend knows which symbols
- // need to be kept or can be internalized. This is a simplified symbol
- // resolution scheme to approximate the full resolution a linker would do.
- uint64_t Idx = 0;
- DenseSet<StringRef> PrevailingSymbols;
- for (auto &BitcodeInput : BitcodeInputFiles) {
- // Get a semi-unique buffer identifier for Thin-LTO.
- StringRef Identifier = Saver.save(
- std::to_string(Idx++) + "." +
- BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier());
- MemoryBufferRef Buffer =
- MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier);
- Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr =
- llvm::lto::InputFile::create(Buffer);
- if (!BitcodeFileOrErr)
- return BitcodeFileOrErr.takeError();
-
- // Save the input file and the buffer associated with its memory.
- const auto Symbols = (*BitcodeFileOrErr)->symbols();
- SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
- size_t Idx = 0;
- for (auto &Sym : Symbols) {
- lto::SymbolResolution &Res = Resolutions[Idx++];
-
- // We will use this as the prevailing symbol definition in LTO unless
- // it is undefined or another definition has already been used.
- Res.Prevailing =
- !Sym.isUndefined() &&
- !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) &&
- PrevailingSymbols.insert(Saver.save(Sym.getName())).second;
-
- // We need LTO to preseve the following global symbols:
- // 1) Symbols used in regular objects.
- // 2) Sections that will be given a __start/__stop symbol.
- // 3) Prevailing symbols that are needed visible to external
- // libraries.
- Res.VisibleToRegularObj =
- UsedInRegularObj.contains(Sym.getName()) ||
- isValidCIdentifier(Sym.getSectionName()) ||
- (Res.Prevailing &&
- (Sym.getVisibility() != GlobalValue::HiddenVisibility &&
- !Sym.canBeOmittedFromSymbolTable()));
-
- // Identify symbols that must be exported dynamically and can be
- // referenced by other files.
- Res.ExportDynamic =
- Sym.getVisibility() != GlobalValue::HiddenVisibility &&
- (UsedInSharedLib.contains(Sym.getName()) ||
- !Sym.canBeOmittedFromSymbolTable());
-
- // The final definition will reside in this linkage unit if the symbol
- // is defined and local to the module. This only checks for bitcode
- // files, full assertion will require complete symbol resolution.
- Res.FinalDefinitionInLinkageUnit =
- Sym.getVisibility() != GlobalValue::DefaultVisibility &&
- (!Sym.isUndefined() && !Sym.isCommon());
-
- // We do not support linker redefined symbols (e.g. --wrap) for device
- // image linking, so the symbols will not be changed after LTO.
- Res.LinkerRedefined = false;
- }
-
- // Add the bitcode file with its resolved symbols to the LTO job.
- if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions))
- return Err;
- }
-
- // Run the LTO job to compile the bitcode.
- size_t MaxTasks = LTOBackend->getMaxTasks();
- SmallVector<StringRef> Files(MaxTasks);
- auto AddStream =
- [&](size_t Task,
- const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
- int FD = -1;
- auto &TempFile = Files[Task];
- StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o";
- std::string TaskStr = Task ? "." + std::to_string(Task) : "";
- auto TempFileOrErr =
- createOutputFile(sys::path::filename(ExecutableName) + "." +
- Triple.getTriple() + "." + Arch + TaskStr,
- Extension);
- if (!TempFileOrErr)
- reportError(TempFileOrErr.takeError());
- TempFile = *TempFileOrErr;
- if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD))
- reportError(errorCodeToError(EC));
- return std::make_unique<CachedFileStream>(
- std::make_unique<llvm::raw_fd_ostream>(FD, true));
- };
-
- if (Error Err = LTOBackend->run(AddStream))
- return Err;
-
- if (LTOError)
- return createStringError("Errors encountered inside the LTO pipeline.");
-
- // If we are embedding bitcode we only need the intermediate output.
- bool SingleOutput = Files.size() == 1;
- if (Args.hasArg(OPT_embed_bitcode)) {
- if (BitcodeOutput.size() != 1 || !SingleOutput)
- return createStringError("Cannot embed bitcode with multiple files.");
- OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front()));
- return Error::success();
- }
-
- // Append the new inputs to the device linker input. If the user requested
- // an internalizing link we need to pass the bitcode to clang.
- for (StringRef File :
- Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)
- ? BitcodeOutput
- : Files)
- OutputFiles.push_back(File);
-
- return Error::success();
-}
-
Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
const OffloadBinary &Binary = *File.getBinary();
@@ -1325,15 +991,8 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
if (File.getBinary()->getOffloadKind() != OFK_None)
ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
- // First link and remove all the input files containing bitcode if
- // the target linker does not support it natively.
+ // Write any remaining device inputs to an output file.
SmallVector<StringRef> InputFiles;
- if (!linkerSupportsLTO(LinkerArgs))
- if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
- return Err;
-
- // Write any remaining device inputs to an output file for the
- // linker.
for (const OffloadFile &File : Input) {
auto FileNameOrErr = writeOffloadFile(File);
if (!FileNameOrErr)
More information about the cfe-commits
mailing list