[llvm] Extend LLVM Offloading API for binary fatbin Bundles (PR #114833)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 11 10:01:31 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities
Author: David Salinas (david-salinas)
<details>
<summary>Changes</summary>
With the intention to provide a common API for offloading, this
extension to the existing LLVM Offloading API adds support for
Binary Fatbin Bundles; moving some support from the Clang offloading
API. The intention is to add functionality to LLVM tooling for
Binary Fatbin Bundles in subsequent commits.
Change-Id: I907fdcbcd0545162a0ce1cf17ebf7c9f3a4dbde6
---
Patch is 25.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114833.diff
2 Files Affected:
- (modified) llvm/include/llvm/Object/OffloadBinary.h (+153)
- (modified) llvm/lib/Object/OffloadBinary.cpp (+468)
``````````diff
diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h
index c02aec8d956ed6..ee01cabe146517 100644
--- a/llvm/include/llvm/Object/OffloadBinary.h
+++ b/llvm/include/llvm/Object/OffloadBinary.h
@@ -21,6 +21,8 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include <memory>
@@ -49,6 +51,31 @@ enum ImageKind : uint16_t {
IMG_LAST,
};
+class CompressedOffloadBundle {
+private:
+ static inline const size_t MagicSize = 4;
+ static inline const size_t VersionFieldSize = sizeof(uint16_t);
+ static inline const size_t MethodFieldSize = sizeof(uint16_t);
+ static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
+ static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
+ static inline const size_t HashFieldSize = sizeof(uint64_t);
+ static inline const size_t V1HeaderSize =
+ MagicSize + VersionFieldSize + MethodFieldSize +
+ UncompressedSizeFieldSize + HashFieldSize;
+ static inline const size_t V2HeaderSize =
+ MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
+ UncompressedSizeFieldSize + HashFieldSize;
+ static inline const llvm::StringRef MagicNumber = "CCOB";
+ static inline const uint16_t Version = 2;
+
+public:
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
+ bool Verbose = false);
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
+};
+
/// A simple binary serialization of an offloading file. We use this format to
/// embed the offloading image into the host executable so it can be extracted
/// and used by the linker.
@@ -183,11 +210,137 @@ class OffloadFile : public OwningBinary<OffloadBinary> {
}
};
+class OffloadFatBinBundle {
+
+private:
+ uint64_t Size = 0u;
+ StringRef FileName;
+ int64_t NumberOfEntries;
+
+public:
+ struct BundleEntry {
+ uint64_t Offset = 0u;
+ uint64_t Size = 0u;
+ uint64_t IDLength = 0u;
+ StringRef ID;
+ BundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
+ : Offset(O), Size(S), IDLength(I), ID(T) {}
+ void dump(raw_ostream &OS) {
+ OS << "Offset = " << Offset << ", Size = " << Size
+ << ", ID Length = " << IDLength << ", ID = " << ID;
+ }
+ void dumpURI(raw_ostream &OS, StringRef filePath) {
+ OS << ID.data() << "\tfile:\/\/" << filePath << "#offset=" << Offset
+ << "&size=" << Size << "\n";
+ }
+ };
+
+ uint64_t getSize() const { return Size; }
+ StringRef getFileName() const { return FileName; }
+ int64_t getNumEntries() const { return NumberOfEntries; }
+
+ std::unique_ptr<SmallVector<BundleEntry>> Entries;
+ static Expected<std::unique_ptr<OffloadFatBinBundle>>
+ create(MemoryBufferRef, uint64_t SectionOffset, StringRef fileName);
+ Error extractBundle(const ObjectFile &Source);
+
+ Error ReadEntries(StringRef Section, uint64_t SectionOffset);
+ void DumpEntries() {
+ SmallVectorImpl<BundleEntry>::iterator it = Entries->begin();
+ for (int64_t I = 0; I < Entries->size(); I++) {
+ it->dump(outs());
+ ++it;
+ }
+ }
+
+ void PrintEntriesAsURI() {
+ SmallVectorImpl<BundleEntry>::iterator it = Entries->begin();
+ for (int64_t I = 0; I < NumberOfEntries; I++) {
+ it->dumpURI(outs(), FileName);
+ ++it;
+ }
+ }
+
+ OffloadFatBinBundle(MemoryBufferRef Source, StringRef file) : FileName(file) {
+ NumberOfEntries = 0;
+ Entries = std::make_unique<SmallVector<BundleEntry>>();
+ }
+};
+
+enum uri_type_t { FILE_URI, MEMORY_URI };
+
+struct OffloadBundleURI {
+ int64_t Offset = 0;
+ int64_t Size = 0;
+ uint64_t ProcessID = 0;
+ StringRef FileName;
+ uri_type_t URIType;
+
+ // Constructors
+ // TODO: add a Copy ctor ?
+ OffloadBundleURI(StringRef file, int64_t off, int64_t size)
+ : Offset(off), Size(size), ProcessID(0), FileName(file),
+ URIType(FILE_URI) {}
+
+ OffloadBundleURI(StringRef str, uri_type_t type) {
+ URIType = type;
+ switch (URIType) {
+ case FILE_URI:
+ parseFileName(str);
+ break;
+ case MEMORY_URI:
+ parseMemoryURI(str);
+ break;
+ default:
+ report_fatal_error("Unrecognized URI type.");
+ }
+ }
+
+ void parseFileName(StringRef str) {
+ ProcessID = 0;
+ URIType = FILE_URI;
+ if (str.consume_front("file://")) {
+ StringRef FilePathname =
+ str.take_until([](char c) { return (c == '#') || (c == '?'); });
+ FileName = FilePathname;
+ str = str.drop_front(FilePathname.size());
+
+ if (str.consume_front("#offset=")) {
+ StringRef OffsetStr = str.take_until([](char c) { return c == '&'; });
+ OffsetStr.getAsInteger(10, Offset);
+ str = str.drop_front(OffsetStr.size());
+
+ if (str.consume_front("&size=")) {
+ Size;
+ str.getAsInteger(10, Size);
+ } else
+ report_fatal_error("Reading 'size' in URI.");
+ } else
+ report_fatal_error("Reading 'offset' in URI.");
+ } else
+ report_fatal_error("Reading type of URI.");
+ }
+
+ void parseMemoryURI(StringRef str) {
+ // TODO: add parseMemoryURI type
+ }
+
+ StringRef getFileName() const { return FileName; }
+};
+
/// Extracts embedded device offloading code from a memory \p Buffer to a list
/// of \p Binaries.
Error extractOffloadBinaries(MemoryBufferRef Buffer,
SmallVectorImpl<OffloadFile> &Binaries);
+Error extractFatBinaryFromObject(const ObjectFile &Obj,
+ SmallVectorImpl<OffloadFatBinBundle> &Bundles);
+
+Error extractCodeObject(const ObjectFile &Source, int64_t Offset, int64_t Size,
+ StringRef OutputFileName);
+
+Error extractURI(StringRef URIstr);
+
/// Convert a string \p Name to an image kind.
ImageKind getImageKind(StringRef Name);
diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp
index 89dc12551494fd..5b30fcbdc19732 100644
--- a/llvm/lib/Object/OffloadBinary.cpp
+++ b/llvm/lib/Object/OffloadBinary.cpp
@@ -9,6 +9,7 @@
#include "llvm/Object/OffloadBinary.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
@@ -23,14 +24,20 @@
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Timer.h"
using namespace llvm;
using namespace llvm::object;
namespace {
+static llvm::TimerGroup
+ ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
+ "Timer group for clang offload bundler");
+
/// Attempts to extract all the embedded device images contained inside the
/// buffer \p Contents. The buffer is expected to contain a valid offloading
/// binary format.
@@ -99,6 +106,48 @@ Error extractFromObject(const ObjectFile &Obj,
return Error::success();
}
+// Extract an Offload bundle (usually a Clang Offload Bundle) from a fat_bin
+// section
+Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
+ StringRef fileName,
+ SmallVectorImpl<OffloadFatBinBundle> &Bundles) {
+
+ uint64_t Offset = 0;
+ int64_t nextbundleStart = 0;
+
+ // There could be multiple offloading bundles stored at this section.
+ while (nextbundleStart >= 0) {
+
+ std::unique_ptr<MemoryBuffer> Buffer =
+ MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "",
+ /*RequiresNullTerminator*/ false);
+
+ // Create the FatBinBindle object. This will also create the Bundle Entry
+ // list info.
+ auto FatBundleOrErr =
+ OffloadFatBinBundle::create(*Buffer, SectionOffset + Offset, fileName);
+ if (!FatBundleOrErr)
+ return FatBundleOrErr.takeError();
+ OffloadFatBinBundle &Bundle = **FatBundleOrErr;
+
+ // add current Bundle to list.
+ Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+ // find the next bundle by searching for the magic string
+ StringRef str = Buffer->getBuffer();
+ nextbundleStart =
+ (int64_t)str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
+
+ if (nextbundleStart >= 0)
+ Offset += nextbundleStart;
+ else {
+ return Error::success();
+ }
+ } // end of while loop
+
+ return Error::success();
+}
+
Error extractFromBitcode(MemoryBufferRef Buffer,
SmallVectorImpl<OffloadFile> &Binaries) {
LLVMContext Context;
@@ -170,6 +219,103 @@ Error extractFromArchive(const Archive &Library,
} // namespace
+Error OffloadFatBinBundle::ReadEntries(StringRef Buffer,
+ uint64_t SectionOffset) {
+ uint64_t BundleNumber = 0;
+ uint64_t NumOfEntries = 0;
+
+ // get Reader
+ BinaryStreamReader Reader(Buffer, llvm::endianness::little);
+
+ // Read the Magic String first.
+ StringRef Magic;
+ if (auto EC = Reader.readFixedString(Magic, 24)) {
+ return errorCodeToError(object_error::parse_failed);
+ }
+
+ // read the number of Code Objects (Entries) in the current Bundle.
+ if (auto EC = Reader.readInteger(NumOfEntries)) {
+ printf("OffloadFatBinBundle::ReadEntries .... failed to read number of "
+ "Entries\n");
+ return errorCodeToError(object_error::parse_failed);
+ }
+ NumberOfEntries = NumOfEntries;
+
+ // For each Bundle Entry (code object)
+ for (uint64_t I = 0; I < NumOfEntries; I++) {
+ uint64_t EntrySize;
+ uint64_t EntryOffset;
+ uint64_t EntryIDSize;
+ StringRef EntryID;
+ uint64_t absOffset;
+
+ if (auto EC = Reader.readInteger(EntryOffset)) {
+ return errorCodeToError(object_error::parse_failed);
+ }
+
+ if (auto EC = Reader.readInteger(EntrySize)) {
+ return errorCodeToError(object_error::parse_failed);
+ }
+
+ if (auto EC = Reader.readInteger(EntryIDSize)) {
+ return errorCodeToError(object_error::parse_failed);
+ }
+
+ if (auto EC = Reader.readFixedString(EntryID, EntryIDSize)) {
+ return errorCodeToError(object_error::parse_failed);
+ }
+
+ // create a Bundle Entry object:
+ auto entry = new OffloadFatBinBundle::BundleEntry(
+ EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
+
+ Entries->push_back(*entry);
+ } // end of for loop
+
+ return Error::success();
+}
+
+Expected<std::unique_ptr<OffloadFatBinBundle>>
+OffloadFatBinBundle::create(MemoryBufferRef Buf, uint64_t SectionOffset,
+ StringRef fileName) {
+ if (Buf.getBufferSize() < 24)
+ return errorCodeToError(object_error::parse_failed);
+
+ // Check for magic bytes.
+ if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle)
+ return errorCodeToError(object_error::parse_failed);
+
+ OffloadFatBinBundle *TheBundle = new OffloadFatBinBundle(Buf, fileName);
+
+ // Read the Bundle Entries
+ Error Err = TheBundle->ReadEntries(Buf.getBuffer(), SectionOffset);
+ if (Err)
+ return errorCodeToError(object_error::parse_failed);
+
+ return std::unique_ptr<OffloadFatBinBundle>(TheBundle);
+}
+
+Error OffloadFatBinBundle::extractBundle(const ObjectFile &Source) {
+ // This will extract all entries in the Bundle
+ SmallVectorImpl<OffloadFatBinBundle::BundleEntry>::iterator it =
+ Entries->begin();
+ for (int64_t I = 0; I < getNumEntries(); I++) {
+
+ if (it->Size > 0) {
+ // create output file name. Which should be
+ // <fileName>-offset<Offset>-size<Size>.co"
+ std::string str = getFileName().str() + "-offset" + itostr(it->Offset) +
+ "-size" + itostr(it->Size) + ".co";
+ if (Error Err = object::extractCodeObject(Source, it->Offset, it->Size,
+ StringRef(str)))
+ return Err;
+ }
+ ++it;
+ }
+
+ return Error::success();
+}
+
Expected<std::unique_ptr<OffloadBinary>>
OffloadBinary::create(MemoryBufferRef Buf) {
if (Buf.getBufferSize() < sizeof(Header) + sizeof(Entry))
@@ -299,6 +445,108 @@ Error object::extractOffloadBinaries(MemoryBufferRef Buffer,
}
}
+Error object::extractFatBinaryFromObject(
+ const ObjectFile &Obj, SmallVectorImpl<OffloadFatBinBundle> &Bundles) {
+ assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type");
+
+ // iterate through Sections until we find an offload_bundle section.
+ for (SectionRef Sec : Obj.sections()) {
+ Expected<StringRef> Buffer = Sec.getContents();
+ if (!Buffer)
+ return Buffer.takeError();
+
+ // If it does not start with the reserved suffix, just skip this section.
+ if ((llvm::identify_magic(*Buffer) == llvm::file_magic::offload_bundle) ||
+ (llvm::identify_magic(*Buffer) ==
+ llvm::file_magic::offload_bundle_compressed)) {
+
+ uint64_t SectionOffset = 0;
+ if (Obj.isELF()) {
+ SectionOffset = ELFSectionRef(Sec).getOffset();
+ } else if (Obj.isCOFF()) {
+ if (const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(&Obj)) {
+ const coff_section *CoffSection = COFFObj->getCOFFSection(Sec);
+ fprintf(
+ stderr, "DAVE: COFF viritual address =0x%llX\n",
+ CoffSection
+ ->VirtualAddress); // COFFObj->getCOFFSection(Sec)->VirtualAddress);
+ }
+ }
+
+ MemoryBufferRef Contents(*Buffer, Obj.getFileName());
+
+ if (llvm::identify_magic(*Buffer) ==
+ llvm::file_magic::offload_bundle_compressed) {
+ // Decompress the input if necessary.
+ Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+ CompressedOffloadBundle::decompress(Contents, false);
+
+ if (!DecompressedBufferOrErr)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Failed to decompress input: " +
+ llvm::toString(DecompressedBufferOrErr.takeError()));
+
+ MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
+ if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
+ Obj.getFileName(), Bundles))
+ return Err;
+ } else {
+ if (Error Err = extractOffloadBundle(Contents, SectionOffset,
+ Obj.getFileName(), Bundles))
+ return Err;
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset,
+ int64_t Size, StringRef OutputFileName) {
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(OutputFileName, Size);
+
+ if (!BufferOrErr)
+ return BufferOrErr.takeError();
+
+ Expected<MemoryBufferRef> InputBuffOrErr = Source.getMemoryBufferRef();
+ if (Error Err = InputBuffOrErr.takeError())
+ return Err;
+
+ std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+ std::copy(InputBuffOrErr->getBufferStart() + Offset,
+ InputBuffOrErr->getBufferStart() + Offset + Size,
+ Buf->getBufferStart());
+ if (Error E = Buf->commit())
+ return E;
+
+ return Error::success();
+}
+
+// given a file name, offset, and size, extract data into a code object file,
+// into file <SourceFile>-offset<Offset>-size<Size>.co
+Error object::extractURI(StringRef URIstr) {
+ // create a URI object
+ object::OffloadBundleURI *uri =
+ new object::OffloadBundleURI(URIstr, FILE_URI);
+
+ std::string OutputFile = uri->FileName.str();
+ OutputFile +=
+ "-offset" + itostr(uri->Offset) + "-size" + itostr(uri->Size) + ".co";
+
+ // Create an ObjectFile object from uri.file_uri
+ auto ObjOrErr = ObjectFile::createObjectFile(uri->FileName);
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
+
+ auto Obj = ObjOrErr->getBinary();
+ if (Error Err =
+ object::extractCodeObject(*Obj, uri->Offset, uri->Size, OutputFile))
+ return Err;
+
+ return Error::success();
+}
+
OffloadKind object::getOffloadKind(StringRef Name) {
return llvm::StringSwitch<OffloadKind>(Name)
.Case("openmp", OFK_OpenMP)
@@ -382,3 +630,223 @@ bool object::areTargetsCompatible(const OffloadFile::TargetID &LHS,
return false;
return true;
}
+
+// Utility function to format numbers with commas
+static std::string formatWithCommas(unsigned long long Value) {
+ std::string Num = std::to_string(Value);
+ int InsertPosition = Num.length() - 3;
+ while (InsertPosition > 0) {
+ Num.insert(InsertPosition, ",");
+ InsertPosition -= 3;
+ }
+ return Num;
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
+
+ bool Verbose) {
+ StringRef Blob = Input.getBuffer();
+
+ if (Blob.size() < V1HeaderSize)
+ return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+
+ if (llvm::identify_magic(Blob) !=
+ llvm::file_magic::offload_bundle_compressed) {
+ if (Verbose)
+ llvm::errs() << "Uncompressed bundle.\n";
+ return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+ }
+
+ size_t CurrentOffset = MagicSize;
+
+ uint16_t ThisVersion;
+ memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
+ CurrentOffset += VersionFieldSize;
+
+ uint16_t CompressionMethod;
+ memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
+ CurrentOffset += MethodFieldSize;
+
+ uint32_t TotalFileSize;
+ if (ThisVersion >= 2) {
+ if (Blob.size() < V2HeaderSize)
+ return createStringError(inconvertibleErrorCode(),
+ "Compressed bundle header size too small");
+ memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
+ CurrentOffset += FileSizeFieldSize;
+ }
+
+ uint32_t UncompressedSize;
+ memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
+ CurrentOffset += UncompressedSizeFieldSize;
+
+ uint64_t StoredHash;
+ memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
+ CurrentOffset += HashFieldSize;
+
+ llvm::compression::Format CompressionFormat;
+ if (CompressionMethod ==
+ static_cast<uint16_t>(llvm::compression::Format::Zlib))
+ CompressionFormat = llvm::compression::Format::Zlib;
+ else if (CompressionMethod ==
+ static_cast<uint16_t>(llvm::compression::Format::Zstd))
+ CompressionFormat = llvm::compression::Format::Zstd;
+ else
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressing method");
+
+ llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
+ ClangOffloadBundlerTimerGroup);
+ if (Verbose)
+ DecompressTimer.startTimer();
+
+ SmallVector<uint8_t, 0> DecompressedData;
+ StringRef CompressedData = Blob.substr(CurrentOffset);
+ if (llvm::Error DecompressionError = llvm::compression::decompress(
+ CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+ DecompressedData, UncompressedSize))
+ return createStringError(inconvertibleErrorCode(),
+ "Could not decompress embedded file contents: " +
+ llvm::toString(std::move(DecompressionError)));
+
+ if (Verbose) {
+ DecompressTimer.stopTimer();
+
+ double DecompressionTimeSeconds =
+ DecompressTimer.getTotalTime().getWallTime();
+
+ // Recalculate MD5 hash for integrity check
+ llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
+ "Hash recalculation time",
+ ClangOffloadBundlerTimerGroup);
+ HashRecalcTimer.startTimer();
+ llvm::MD5 Hash;
+ llvm::MD5::MD5Result Result;
+ Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
+ DecompressedData.size()));
+ Hash.final(Result);
+ uint64_t RecalculatedHash = Result.low();
+ HashRecalcTimer.stopTimer();
+ bool HashMatch = (StoredHash == Recalcu...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114833
More information about the llvm-commits
mailing list