[llvm] 67ff66e - [PGO][Offload] Fix offload coverage mapping (#143490)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 18:19:42 PDT 2025
Author: Ethan Luis McDonough
Date: 2025-06-10T20:19:38-05:00
New Revision: 67ff66e67734c0b283ec676899e5b89b67fdafcb
URL: https://github.com/llvm/llvm-project/commit/67ff66e67734c0b283ec676899e5b89b67fdafcb
DIFF: https://github.com/llvm/llvm-project/commit/67ff66e67734c0b283ec676899e5b89b67fdafcb.diff
LOG: [PGO][Offload] Fix offload coverage mapping (#143490)
This pull request fixes coverage mapping on GPU targets.
- It adds an address space cast to the coverage mapping generation pass.
- It reads the profiled function names from the ELF directly. Reading it
from public globals was causing issues in cases where multiple
device-code object files are linked together.
Added:
Modified:
clang/lib/CodeGen/CoverageMappingGen.cpp
llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
offload/plugins-nextgen/common/include/GlobalHandler.h
offload/plugins-nextgen/common/src/GlobalHandler.cpp
offload/plugins-nextgen/common/src/PluginInterface.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 1788bb4f28697..4aafac349e3e9 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -2622,8 +2622,9 @@ void CoverageMappingModuleGen::emit() {
CGM.addUsedGlobal(CovData);
// Create the deferred function records array
if (!FunctionNames.empty()) {
- auto NamesArrTy = llvm::ArrayType::get(llvm::PointerType::getUnqual(Ctx),
- FunctionNames.size());
+ auto AddrSpace = FunctionNames.front()->getType()->getPointerAddressSpace();
+ auto NamesArrTy = llvm::ArrayType::get(
+ llvm::PointerType::get(Ctx, AddrSpace), FunctionNames.size());
auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
// This variable will *NOT* be emitted to the object file. It is used
// to pass the list of names referenced to codegen.
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index fe3b0da33a009..5e7548b0a2fd1 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1955,12 +1955,6 @@ void InstrLowerer::emitNameData() {
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
- // Make names variable public if current target is a GPU
- if (isGPUProfTarget(M)) {
- NamesVar->setLinkage(GlobalValue::ExternalLinkage);
- NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
- }
-
NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index 6def53430a7c0..5d6109df49da5 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -80,6 +80,7 @@ struct GPUProfGlobals {
void dump() const;
Error write() const;
+ bool empty() const;
};
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
@@ -192,9 +193,6 @@ class GenericGlobalHandlerTy {
/*D2H=*/false);
}
- /// Checks whether a given image contains profiling globals.
- bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
-
/// Reads profiling data from a GPU image to supplied profdata struct.
/// Iterates through the image symbol table and stores global values
/// with profiling prefixes.
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 27d7e8ee2fdf3..5464c197dba78 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -173,16 +173,6 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
return Plugin::success();
}
-bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
- DeviceImageTy &Image) {
- GlobalTy global(getInstrProfNamesVarName().str(), 0);
- if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
- consumeError(std::move(Err));
- return false;
- }
- return true;
-}
-
Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
@@ -204,12 +194,17 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
// Check if given current global is a profiling global based
// on name
if (*NameOrErr == getInstrProfNamesVarName()) {
- // Read in profiled function names
- DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
- GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
- DeviceProfileData.NamesData.data());
- if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
- return Err;
+ // Read in profiled function names from ELF
+ auto SectionOrErr = Sym.getSection();
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+
+ auto ContentsOrErr = (*SectionOrErr)->getContents();
+ if (!ContentsOrErr)
+ return ContentsOrErr.takeError();
+
+ SmallVector<uint8_t> NameBytes(ContentsOrErr->bytes());
+ DeviceProfileData.NamesData = NameBytes;
} else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
// Read global variable profiling counts
SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
@@ -322,3 +317,7 @@ Error GPUProfGlobals::write() const {
return Plugin::success();
}
+
+bool GPUProfGlobals::empty() const {
+ return Counts.empty() && Data.empty() && NamesData.empty();
+}
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f9e316adad8f4..f9a6b3c1f4324 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -858,14 +858,13 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
for (auto *Image : LoadedImages) {
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
- if (!Handler.hasProfilingGlobals(*this, *Image))
- continue;
-
- GPUProfGlobals profdata;
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
if (!ProfOrErr)
return ProfOrErr.takeError();
+ if (ProfOrErr->empty())
+ continue;
+
// Dump out profdata
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
uint32_t(DeviceDebugKind::PGODump))
More information about the llvm-commits
mailing list