[clang] [compiler-rt] [llvm] [openmp] [PGO][Offload] Add GPU profiling flags to driver (PR #94268)
Ethan Luis McDonough via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 3 11:33:26 PDT 2024
https://github.com/EthanLuisMcDonough created https://github.com/llvm/llvm-project/pull/94268
This pull request is the third part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on https://github.com/llvm/llvm-project/pull/93365. This PR makes the following changes:
- Adds the flags `-fprofile-generate-gpu`, `-fprofile-instr-generate-gpu`, and `-fprofile-use-gpu`
- Adds code to process the new flags
- Adds `PGODump` debug flag and only displays dump when the aforementioned flag is set
- Pulls version global from device
- Modifies `__llvm_write_custom_profile` and `lprofWriteDataImpl` to allow the PGO version to be overridden
>From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 15 Dec 2023 20:38:38 -0600
Subject: [PATCH 01/44] Add profiling functions to libomptarget
---
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++
openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++
.../DeviceRTL/include/Profiling.h | 21 +++++++++++++++++++
.../libomptarget/DeviceRTL/src/Profiling.cpp | 19 +++++++++++++++++
4 files changed, 45 insertions(+)
create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h
create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index d22d2a8e948b0..1d887d5cb5812 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
+__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
+__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)
+
__OMP_RTL(__last, false, Void, )
#undef __OMP_RTL
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 1ce3e1e40a80a..55ee15d068c67 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -89,6 +89,7 @@ set(include_files
${include_directory}/Interface.h
${include_directory}/LibC.h
${include_directory}/Mapping.h
+ ${include_directory}/Profiling.h
${include_directory}/State.h
${include_directory}/Synchronization.h
${include_directory}/Types.h
@@ -104,6 +105,7 @@ set(src_files
${source_directory}/Mapping.cpp
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
+ ${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h
new file mode 100644
index 0000000000000..68c7744cd6075
--- /dev/null
+++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h
@@ -0,0 +1,21 @@
+//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_DEVICERTL_PROFILING_H
+#define OMPTARGET_DEVICERTL_PROFILING_H
+
+extern "C" {
+
+void __llvm_profile_register_function(void *ptr);
+void __llvm_profile_register_names_function(void *ptr, long int i);
+}
+
+#endif
diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp
new file mode 100644
index 0000000000000..799477f5e47d2
--- /dev/null
+++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp
@@ -0,0 +1,19 @@
+//===------- Profiling.cpp ---------------------------------------- C++ ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Profiling.h"
+
+#pragma omp begin declare target device_type(nohost)
+
+extern "C" {
+
+void __llvm_profile_register_function(void *ptr) {}
+void __llvm_profile_register_names_function(void *ptr, long int i) {}
+}
+
+#pragma omp end declare target
>From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Sat, 16 Dec 2023 01:18:41 -0600
Subject: [PATCH 02/44] Fix PGO instrumentation for GPU targets
---
clang/lib/CodeGen/CodeGenPGO.cpp | 10 ++++++++--
.../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++---
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 81bf8ea696b16..edae6885b528a 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
unsigned Counter = (*RegionCounterMap)[S];
- llvm::Value *Args[] = {FuncNameVar,
- Builder.getInt64(FunctionHash),
+ // Make sure that pointer to global is passed in with zero addrspace
+ // This is relevant during GPU profiling
+ auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext());
+ auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty);
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ FuncNameVar, I8PtrTy);
+
+ llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
Builder.getInt32(Counter), StepV};
if (!StepV)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index fe5a0578bd972..d2cb8155c1796 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1658,10 +1658,13 @@ void InstrLowerer::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (!isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, Data);
+ // Check for addrspace cast when profiling GPU
+ IRB.CreateCall(RuntimeRegisterF,
+ IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, Data);
+ IRB.CreateCall(RuntimeRegisterF,
+ IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
if (NamesVar) {
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
@@ -1670,7 +1673,9 @@ void InstrLowerer::emitRegistration() {
auto *NamesRegisterF =
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfNamesRegFuncName(), M);
- IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
+ IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
+ NamesVar, VoidPtrTy),
+ IRB.getInt64(NamesSize)});
}
IRB.CreateRetVoid();
>From 7a0e0efa178cc4de6a22a8f5cc3f53cd1c81ea3a Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 21 Dec 2023 00:25:46 -0600
Subject: [PATCH 03/44] Change global visibility on GPU targets
---
llvm/include/llvm/ProfileData/InstrProf.h | 4 ++++
llvm/lib/ProfileData/InstrProf.cpp | 17 +++++++++++++++--
.../Instrumentation/InstrProfiling.cpp | 15 +++++++++++----
3 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 288dc71d756ae..bf9899d867e3d 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -171,6 +171,10 @@ inline StringRef getInstrProfCounterBiasVarName() {
/// Return the marker used to separate PGO names during serialization.
inline StringRef getInstrProfNameSeparator() { return "\01"; }
+/// Determines whether module targets a GPU eligable for PGO
+/// instrumentation
+bool isGPUProfTarget(const Module &M);
+
/// Return the modified name for function \c F suitable to be
/// used the key for profile lookup. Variable \c InLTO indicates if this
/// is called in LTO optimization passes.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 649d814cfd9de..0d6717aeb0142 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -410,13 +410,22 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
}
+bool isGPUProfTarget(const Module &M) {
+ const auto &triple = M.getTargetTriple();
+ return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 ||
+ triple.rfind("r600", 0) == 0;
+}
+
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
+ // Ensure profiling variables on GPU are visible to be read from host
+ if (isGPUProfTarget(M))
+ Linkage = GlobalValue::ExternalLinkage;
// We generally want to match the function's linkage, but available_externally
// and extern_weak both have the wrong semantics, and anything that doesn't
// need to link across compilation units doesn't need to be visible at all.
- if (Linkage == GlobalValue::ExternalWeakLinkage)
+ else if (Linkage == GlobalValue::ExternalWeakLinkage)
Linkage = GlobalValue::LinkOnceAnyLinkage;
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
Linkage = GlobalValue::LinkOnceODRLinkage;
@@ -430,8 +439,12 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));
+ // If the target is a GPU, make the symbol protected so it can
+ // be read from the host device
+ if (isGPUProfTarget(M))
+ FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
// Hide the symbol so that we correctly get a copy for each executable.
- if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+ else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
return FuncNameVar;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d2cb8155c1796..3b582b6519080 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1481,6 +1481,10 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+ if (isGPUProfTarget(M)) {
+ Linkage = GlobalValue::ExternalLinkage;
+ Visibility = GlobalValue::ProtectedVisibility;
+ }
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
@@ -1492,9 +1496,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
// that other copies must have the same CFG and cannot have value profiling.
// If no hash suffix, other profd copies may be referenced by code.
- if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
- (TT.isOSBinFormatELF() ||
- (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+ else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+ (TT.isOSBinFormatELF() ||
+ (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
@@ -1696,7 +1700,10 @@ bool InstrLowerer::emitRuntimeHook() {
auto *Var =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
- Var->setVisibility(GlobalValue::HiddenVisibility);
+ if (isGPUProfTarget(M))
+ Var->setVisibility(GlobalValue::ProtectedVisibility);
+ else
+ Var->setVisibility(GlobalValue::HiddenVisibility);
if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
>From fddc07908ed9aa698fe3250ddbfc5621ab4d049d Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 22 Dec 2023 23:43:29 -0600
Subject: [PATCH 04/44] Make names global public on GPU
---
llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 3b582b6519080..61fba7be3ee0e 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1621,6 +1621,13 @@ void InstrLowerer::emitNameData() {
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
+
+ // Make names variable public if current target is a GPU
+ if (isGPUProfTarget(M)) {
+ NamesVar->setLinkage(GlobalValue::ExternalLinkage);
+ NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
+ }
+
NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
>From e9db03c70bf79f4f4ddad4b48a5aa63a37e0d4f6 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 29 Dec 2023 12:54:50 -0600
Subject: [PATCH 05/44] Read and print GPU device PGO globals
---
.../common/include/GlobalHandler.h | 27 ++++++
.../common/src/GlobalHandler.cpp | 82 +++++++++++++++++++
.../common/src/PluginInterface.cpp | 14 ++++
3 files changed, 123 insertions(+)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index fa079ac9660ee..a82cd53648765 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -14,9 +14,11 @@
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#include <string>
+#include <vector>
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "Shared/Debug.h"
#include "Shared/Utils.h"
@@ -58,6 +60,22 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};
+typedef void *IntPtrT;
+struct __llvm_profile_data {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+/// PGO profiling data extracted from a GPU device
+struct GPUProfGlobals {
+ std::string names;
+ std::vector<std::vector<int64_t>> counts;
+ std::vector<__llvm_profile_data> data;
+ Triple targetTriple;
+
+ void dump() const;
+};
+
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
template <typename Ty> class StaticGlobalTy : public GlobalTy {
Ty Data;
@@ -172,6 +190,15 @@ class GenericGlobalHandlerTy {
return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
/* D2H */ false);
}
+
+ /// Checks whether a given image contains profiling globals.
+ bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
+
+ /// Reads profiling data from a GPU image to supplied profdata struct.
+ /// Iterates through the image symbol table and stores global values
+ /// with profiling prefixes.
+ Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image);
};
} // namespace plugin
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 3a272e228c7df..5dd5daec468ca 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -176,3 +176,85 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
return Plugin::success();
}
+
+bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image) {
+ GlobalTy global(getInstrProfNamesVarName().str(), 0);
+ if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
+ consumeError(std::move(Err));
+ return false;
+ }
+ return true;
+}
+
+Expected<GPUProfGlobals>
+GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image) {
+ GPUProfGlobals profdata;
+ const auto *elf = getOrCreateELFObjectFile(Device, Image);
+ profdata.targetTriple = elf->makeTriple();
+ // Iterate through
+ for (auto &sym : elf->symbols()) {
+ if (auto name = sym.getName()) {
+ // Check if given current global is a profiling global based
+ // on name
+ if (name->equals(getInstrProfNamesVarName())) {
+ // Read in profiled function names
+ std::vector<char> chars(sym.getSize() / sizeof(char), ' ');
+ GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
+ return Err;
+ std::string names(chars.begin(), chars.end());
+ profdata.names = std::move(names);
+ } else if (name->starts_with(getInstrProfCountersVarPrefix())) {
+ // Read global variable profiling counts
+ std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0);
+ GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
+ return Err;
+ profdata.counts.push_back(std::move(counts));
+ } else if (name->starts_with(getInstrProfDataVarPrefix())) {
+ // Read profiling data for this global variable
+ __llvm_profile_data data{};
+ GlobalTy DataGlobal(name->str(), sym.getSize(), &data);
+ if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
+ return Err;
+ profdata.data.push_back(std::move(data));
+ }
+ }
+ }
+ return profdata;
+}
+
+void GPUProfGlobals::dump() const {
+ llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str()
+ << "\n";
+
+ llvm::outs() << "======== Counters =========\n";
+ for (const auto &count : counts) {
+ llvm::outs() << "[";
+ for (size_t i = 0; i < count.size(); i++) {
+ if (i == 0)
+ llvm::outs() << " ";
+ llvm::outs() << count[i] << " ";
+ }
+ llvm::outs() << "]\n";
+ }
+
+ llvm::outs() << "========== Data ===========\n";
+ for (const auto &d : data) {
+ llvm::outs() << "{ ";
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ llvm::outs() << d.Name << " ";
+#include "llvm/ProfileData/InstrProfData.inc"
+ llvm::outs() << " }\n";
+ }
+
+ llvm::outs() << "======== Functions ========\n";
+ InstrProfSymtab symtab;
+ if (Error Err = symtab.create(StringRef(names))) {
+ consumeError(std::move(Err));
+ }
+ symtab.dumpNames(llvm::outs());
+ llvm::outs() << "===========================\n";
+}
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 3c7d1ca899878..84ed90f03f84f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -811,6 +811,20 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
DeviceMemoryPoolTracking.AllocationMax);
}
+ for (auto *Image : LoadedImages) {
+ GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
+ if (!Handler.hasProfilingGlobals(*this, *Image))
+ continue;
+
+ GPUProfGlobals profdata;
+ auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
+ if (!ProfOrErr)
+ return ProfOrErr.takeError();
+
+ // TODO: write data to profiling file
+ ProfOrErr->dump();
+ }
+
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
>From e4687605d1a6ca932312025826db09dba84845a3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:06:15 -0600
Subject: [PATCH 06/44] Fix rebase bug
---
.../plugins-nextgen/common/src/GlobalHandler.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index cb71b61f4a9c4..86742d0f77a2f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -178,10 +178,12 @@ Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
GPUProfGlobals profdata;
- const auto *elf = getOrCreateELFObjectFile(Device, Image);
- profdata.targetTriple = elf->makeTriple();
- // Iterate through
- for (auto &sym : elf->symbols()) {
+ auto ELFObj = getELFObjectFile(Image);
+ if (!ELFObj)
+ return ELFObj.takeError();
+ profdata.targetTriple = ELFObj->makeTriple();
+ // Iterate through elf symbols
+ for (auto &sym : ELFObj->symbols()) {
if (auto name = sym.getName()) {
// Check if given current global is a profiling global based
// on name
>From ec18ce94c227e1d43927955fa1c67360ecfcfca6 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:10:19 -0600
Subject: [PATCH 07/44] Refactor portions to be more idiomatic
---
clang/lib/CodeGen/CodeGenPGO.cpp | 4 +---
llvm/lib/ProfileData/InstrProf.cpp | 5 ++---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index edae6885b528a..7bfcec43ee4c9 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -961,10 +961,8 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext());
- auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty);
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- FuncNameVar, I8PtrTy);
+ FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext()));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index cdcd6840bb510..1d88da16a5ff9 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -429,9 +429,8 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
}
bool isGPUProfTarget(const Module &M) {
- const auto &triple = M.getTargetTriple();
- return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 ||
- triple.rfind("r600", 0) == 0;
+ const auto &Triple = llvm::Triple(M.getTargetTriple());
+ return Triple.isAMDGPU() || Triple.isNVPTX();
}
GlobalVariable *createPGOFuncNameVar(Module &M,
>From 0872556f597056361b0a2c23cdd0be3d9745aef3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:18:47 -0600
Subject: [PATCH 08/44] Reformat DeviceRTL prof functions
---
openmp/libomptarget/DeviceRTL/include/Profiling.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h
index 68c7744cd6075..9efc1554c176b 100644
--- a/openmp/libomptarget/DeviceRTL/include/Profiling.h
+++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h
@@ -13,9 +13,8 @@
#define OMPTARGET_DEVICERTL_PROFILING_H
extern "C" {
-
-void __llvm_profile_register_function(void *ptr);
-void __llvm_profile_register_names_function(void *ptr, long int i);
+void __llvm_profile_register_function(void *Ptr);
+void __llvm_profile_register_names_function(void *Ptr, long int I);
}
#endif
>From 62f31d1c71b5d100f38d6dc584cc138b3904581b Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 9 Jan 2024 11:52:29 -0600
Subject: [PATCH 09/44] Style changes + catch name error
---
.../common/include/GlobalHandler.h | 16 ++--
.../common/src/GlobalHandler.cpp | 87 ++++++++++---------
2 files changed, 56 insertions(+), 47 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index a803b3f76d8b2..755bb23a414e3 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,8 +13,7 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
-#include <string>
-#include <vector>
+#include <type_traits>
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -60,18 +59,19 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};
-typedef void *IntPtrT;
+using IntPtrT = void *;
struct __llvm_profile_data {
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ std::remove_const<Type>::type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};
/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
- std::string names;
- std::vector<std::vector<int64_t>> counts;
- std::vector<__llvm_profile_data> data;
- Triple targetTriple;
+ SmallVector<uint8_t> NamesData;
+ SmallVector<SmallVector<int64_t>> Counts;
+ SmallVector<__llvm_profile_data> Data;
+ Triple TargetTriple;
void dump() const;
};
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 86742d0f77a2f..7cb672e7b2683 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Error.h"
#include <cstring>
+#include <string>
using namespace llvm;
using namespace omp;
@@ -177,73 +178,81 @@ bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
- GPUProfGlobals profdata;
+ GPUProfGlobals DeviceProfileData;
auto ELFObj = getELFObjectFile(Image);
if (!ELFObj)
return ELFObj.takeError();
- profdata.targetTriple = ELFObj->makeTriple();
+ DeviceProfileData.TargetTriple = ELFObj->makeTriple();
+
// Iterate through elf symbols
- for (auto &sym : ELFObj->symbols()) {
- if (auto name = sym.getName()) {
- // Check if given current global is a profiling global based
- // on name
- if (name->equals(getInstrProfNamesVarName())) {
- // Read in profiled function names
- std::vector<char> chars(sym.getSize() / sizeof(char), ' ');
- GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data());
- if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
- return Err;
- std::string names(chars.begin(), chars.end());
- profdata.names = std::move(names);
- } else if (name->starts_with(getInstrProfCountersVarPrefix())) {
- // Read global variable profiling counts
- std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0);
- GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data());
- if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
- return Err;
- profdata.counts.push_back(std::move(counts));
- } else if (name->starts_with(getInstrProfDataVarPrefix())) {
- // Read profiling data for this global variable
- __llvm_profile_data data{};
- GlobalTy DataGlobal(name->str(), sym.getSize(), &data);
- if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
- return Err;
- profdata.data.push_back(std::move(data));
- }
+ for (auto &Sym : ELFObj->symbols()) {
+ auto NameOrErr = Sym.getName();
+ if (!NameOrErr)
+ return ELFObj.takeError();
+
+ // Check if given current global is a profiling global based
+ // on name
+ if (NameOrErr->equals(getInstrProfNamesVarName())) {
+ // Read in profiled function names
+ DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
+ GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
+ DeviceProfileData.NamesData.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
+ return Err;
+ } else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
+ // Read global variable profiling counts
+ SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
+ GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
+ return Err;
+ DeviceProfileData.Counts.push_back(std::move(Counts));
+ } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) {
+ // Read profiling data for this global variable
+ __llvm_profile_data Data{};
+ GlobalTy DataGlobal(NameOrErr->str(), Sym.getSize(), &Data);
+ if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
+ return Err;
+ DeviceProfileData.Data.push_back(std::move(Data));
}
}
- return profdata;
+ return DeviceProfileData;
}
void GPUProfGlobals::dump() const {
- llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str()
+ llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
<< "\n";
llvm::outs() << "======== Counters =========\n";
- for (const auto &count : counts) {
+ for (const auto &Count : Counts) {
llvm::outs() << "[";
- for (size_t i = 0; i < count.size(); i++) {
+ for (size_t i = 0; i < Count.size(); i++) {
if (i == 0)
llvm::outs() << " ";
- llvm::outs() << count[i] << " ";
+ llvm::outs() << Count[i] << " ";
}
llvm::outs() << "]\n";
}
llvm::outs() << "========== Data ===========\n";
- for (const auto &d : data) {
+ for (const auto &ProfData : Data) {
llvm::outs() << "{ ";
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
- llvm::outs() << d.Name << " ";
+ llvm::outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
llvm::outs() << " }\n";
}
llvm::outs() << "======== Functions ========\n";
- InstrProfSymtab symtab;
- if (Error Err = symtab.create(StringRef(names))) {
+ std::string s;
+ s.reserve(NamesData.size());
+ for (uint8_t Name : NamesData) {
+ s.push_back((char)Name);
+ }
+
+ InstrProfSymtab Symtab;
+ if (Error Err = Symtab.create(StringRef(s))) {
consumeError(std::move(Err));
}
- symtab.dumpNames(llvm::outs());
+ Symtab.dumpNames(llvm::outs());
llvm::outs() << "===========================\n";
}
>From 0c4bbeb54d189c1461affd37853aa86c3e3ca7d8 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 17 Jan 2024 19:59:06 -0600
Subject: [PATCH 10/44] Add GPU PGO test
---
.../common/src/GlobalHandler.cpp | 2 +-
openmp/libomptarget/test/CMakeLists.txt | 6 +++
openmp/libomptarget/test/lit.cfg | 3 ++
openmp/libomptarget/test/lit.site.cfg.in | 2 +-
openmp/libomptarget/test/offloading/pgo1.c | 39 +++++++++++++++++++
5 files changed, 50 insertions(+), 2 deletions(-)
create mode 100644 openmp/libomptarget/test/offloading/pgo1.c
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 7cb672e7b2683..e5eb653d02228 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -239,7 +239,7 @@ void GPUProfGlobals::dump() const {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
llvm::outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
- llvm::outs() << " }\n";
+ llvm::outs() << "}\n";
}
llvm::outs() << "======== Functions ========\n";
diff --git a/openmp/libomptarget/test/CMakeLists.txt b/openmp/libomptarget/test/CMakeLists.txt
index a0ba233eaa572..21233f3e252eb 100644
--- a/openmp/libomptarget/test/CMakeLists.txt
+++ b/openmp/libomptarget/test/CMakeLists.txt
@@ -12,6 +12,12 @@ else()
set(LIBOMPTARGET_DEBUG False)
endif()
+if (OPENMP_STANDALONE_BUILD)
+ set(LIBOMPTARGET_TEST_GPU_PGO False)
+else()
+ set(LIBOMPTARGET_TEST_GPU_PGO True)
+endif()
+
# Replace the space from user's input with ";" in case that CMake add escape
# char into the lit command.
string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}")
diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg
index 19c5e5c457222..49743f9fed7f2 100644
--- a/openmp/libomptarget/test/lit.cfg
+++ b/openmp/libomptarget/test/lit.cfg
@@ -104,6 +104,9 @@ config.available_features.add(config.libomptarget_current_target)
if config.libomptarget_has_libc:
config.available_features.add('libc')
+if config.libomptarget_test_pgo:
+ config.available_features.add('pgo')
+
# Determine whether the test system supports unified memory.
# For CUDA, this is the case with compute capability 70 (Volta) or higher.
# For all other targets, we currently assume it is.
diff --git a/openmp/libomptarget/test/lit.site.cfg.in b/openmp/libomptarget/test/lit.site.cfg.in
index 2d63811883872..494d1636af304 100644
--- a/openmp/libomptarget/test/lit.site.cfg.in
+++ b/openmp/libomptarget/test/lit.site.cfg.in
@@ -25,6 +25,6 @@ config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@"
config.libomptarget_debug = @LIBOMPTARGET_DEBUG@
config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@
config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@
-
+config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@
# Let the main config do the real work.
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
new file mode 100644
index 0000000000000..ca8a6f502a06a
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang"
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// REQUIRES: pgo
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+int test1(int a) { return a / 2; }
+int test2(int a) { return a * 2; }
+
+int main() {
+ int m = 2;
+#pragma omp target
+ for (int i = 0; i < 10; i++) {
+ m = test1(m);
+ for (int j = 0; j < 2; j++) {
+ m = test2(m);
+ }
+ }
+}
+
+// CHECK: ======== Counters =========
+// CHECK-NEXT: [ 0 11 20 ]
+// CHECK-NEXT: [ 10 ]
+// CHECK-NEXT: [ 20 ]
+// CHECK-NEXT: ========== Data ===========
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: ======== Functions ========
+// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// CHECK-NEXT: test1
+// CHECK-NEXT: test2
>From c7ae2a74daa93b05058fcc9bba64e0734359362c Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 17 Jan 2024 23:12:27 -0600
Subject: [PATCH 11/44] Fix PGO test formatting
---
openmp/libomptarget/test/offloading/pgo1.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
index ca8a6f502a06a..389be19b670d7 100644
--- a/openmp/libomptarget/test/offloading/pgo1.c
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -1,4 +1,5 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang"
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN: -Xclang "-fprofile-instrument=clang"
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// UNSUPPORTED: x86_64-pc-linux-gnu
@@ -30,9 +31,18 @@ int main() {
// CHECK-NEXT: [ 10 ]
// CHECK-NEXT: [ 20 ]
// CHECK-NEXT: ========== Data ===========
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
// CHECK-NEXT: ======== Functions ========
// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
// CHECK-NEXT: test1
>From 8bb22072914bbb830e2788d117aedd0e0bab66ff Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 18 Jan 2024 23:15:55 -0600
Subject: [PATCH 12/44] Refactor visibility logic
---
llvm/lib/ProfileData/InstrProf.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 511571a3eed9b..708ea63fd95e0 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -422,6 +422,16 @@ bool isGPUProfTarget(const Module &M) {
return Triple.isAMDGPU() || Triple.isNVPTX();
}
+void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
+ // If the target is a GPU, make the symbol protected so it can
+ // be read from the host device
+ if (isGPUProfTarget(M))
+ FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
+ // Hide the symbol so that we correctly get a copy for each executable.
+ else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+ FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
+}
+
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
@@ -445,14 +455,7 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));
- // If the target is a GPU, make the symbol protected so it can
- // be read from the host device
- if (isGPUProfTarget(M))
- FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
- // Hide the symbol so that we correctly get a copy for each executable.
- else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
- FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
-
+ setPGOFuncVisibility(M, FuncNameVar);
return FuncNameVar;
}
>From 9f13943f64cb16162e44902d54de53a9b1229179 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 23 Jan 2024 18:33:58 -0600
Subject: [PATCH 13/44] Add LLVM instrumentation support
This PR formerly only supported -fprofile-instrument=clang. This commit adds support for -fprofile-instrument=llvm
---
.../Instrumentation/PGOInstrumentation.cpp | 12 +++-
openmp/libomptarget/test/offloading/pgo1.c | 72 +++++++++++++------
2 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c20fc942eaf0d..bbc8da78fd7ba 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -862,6 +862,10 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
+ // Make sure that pointer to global is passed in with zero addrspace
+ // This is relevant during GPU profiling
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, llvm::PointerType::getUnqual(M->getContext()));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
@@ -869,7 +873,7 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
- {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
return;
}
@@ -887,7 +891,8 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
- {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters),
+ Builder.getInt32(I)});
I += PGOBlockCoverage ? 8 : 1;
}
@@ -901,7 +906,8 @@ static void instrumentOneFunc(
Intrinsic::getDeclaration(M, PGOBlockCoverage
? Intrinsic::instrprof_cover
: Intrinsic::instrprof_increment),
- {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters),
+ Builder.getInt32(I++)});
}
// Now instrument select instructions:
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
index 389be19b670d7..d95793b508dcf 100644
--- a/openmp/libomptarget/test/offloading/pgo1.c
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -1,6 +1,11 @@
// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
// RUN: -Xclang "-fprofile-instrument=clang"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: --check-prefix="CLANG-PGO"
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN: -Xclang "-fprofile-instrument=llvm"
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: --check-prefix="LLVM-PGO"
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
@@ -26,24 +31,47 @@ int main() {
}
}
-// CHECK: ======== Counters =========
-// CHECK-NEXT: [ 0 11 20 ]
-// CHECK-NEXT: [ 10 ]
-// CHECK-NEXT: [ 20 ]
-// CHECK-NEXT: ========== Data ===========
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: ======== Functions ========
-// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// CHECK-NEXT: test1
-// CHECK-NEXT: test2
+// CLANG-PGO: ======== Counters =========
+// CLANG-PGO-NEXT: [ 0 11 20 ]
+// CLANG-PGO-NEXT: [ 10 ]
+// CLANG-PGO-NEXT: [ 20 ]
+// CLANG-PGO-NEXT: ========== Data ===========
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: ======== Functions ========
+// CLANG-PGO-NEXT: pgo1.c:
+// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// CLANG-PGO-NEXT: test1
+// CLANG-PGO-NEXT: test2
+
+// LLVM-PGO: ======== Counters =========
+// LLVM-PGO-NEXT: [ 20 ]
+// LLVM-PGO-NEXT: [ 10 ]
+// LLVM-PGO-NEXT: [ 20 10 1 1 ]
+// LLVM-PGO-NEXT: ========== Data ===========
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: ======== Functions ========
+// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// LLVM-PGO-NEXT: test1
+// LLVM-PGO-NEXT: test2
>From 0606f0dd1b32ef9ebe138bbc964b3921e22d95d1 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 14 Feb 2024 01:46:55 -0600
Subject: [PATCH 14/44] Use explicit addrspace instead of unqual
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index e084dda879cbc..4c75a01222d30 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1103,7 +1103,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext()));
+ FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index bbc8da78fd7ba..c63b3e4ecf786 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -865,7 +865,7 @@ static void instrumentOneFunc(
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- Name, llvm::PointerType::getUnqual(M->getContext()));
+ Name, llvm::PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
>From c1f9be321678766525141214aaab74636cafbc2c Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 15 Feb 2024 19:10:09 -0600
Subject: [PATCH 15/44] Remove redundant namespaces
---
.../Instrumentation/PGOInstrumentation.cpp | 4 +--
.../common/src/GlobalHandler.cpp | 26 +++++++++----------
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c63b3e4ecf786..3058e577738fd 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,8 +864,8 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- Name, llvm::PointerType::get(M->getContext(), 0));
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index e5eb653d02228..ae270c60804d2 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -219,30 +219,30 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
}
void GPUProfGlobals::dump() const {
- llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
+ outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
<< "\n";
- llvm::outs() << "======== Counters =========\n";
+ outs() << "======== Counters =========\n";
for (const auto &Count : Counts) {
- llvm::outs() << "[";
+ outs() << "[";
for (size_t i = 0; i < Count.size(); i++) {
if (i == 0)
- llvm::outs() << " ";
- llvm::outs() << Count[i] << " ";
+ outs() << " ";
+ outs() << Count[i] << " ";
}
- llvm::outs() << "]\n";
+ outs() << "]\n";
}
- llvm::outs() << "========== Data ===========\n";
+ outs() << "========== Data ===========\n";
for (const auto &ProfData : Data) {
- llvm::outs() << "{ ";
+ outs() << "{ ";
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
- llvm::outs() << ProfData.Name << " ";
+ outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
- llvm::outs() << "}\n";
+ outs() << "}\n";
}
- llvm::outs() << "======== Functions ========\n";
+ outs() << "======== Functions ========\n";
std::string s;
s.reserve(NamesData.size());
for (uint8_t Name : NamesData) {
@@ -253,6 +253,6 @@ void GPUProfGlobals::dump() const {
if (Error Err = Symtab.create(StringRef(s))) {
consumeError(std::move(Err));
}
- Symtab.dumpNames(llvm::outs());
- llvm::outs() << "===========================\n";
+ Symtab.dumpNames(outs());
+ outs() << "===========================\n";
}
>From 6a3ae407e69e7524f0f808329c534f8352ee1779 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 15 Feb 2024 19:15:15 -0600
Subject: [PATCH 16/44] Clang format
---
.../libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index ae270c60804d2..1fce244892262 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -220,7 +220,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
void GPUProfGlobals::dump() const {
outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
- << "\n";
+ << "\n";
outs() << "======== Counters =========\n";
for (const auto &Count : Counts) {
>From 6866862d459e3c3fa65fae8ae639ddc3ff735252 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 16 Feb 2024 13:13:39 -0600
Subject: [PATCH 17/44] Use getAddrSpaceCast
Replace getPointerBitCastOrAddrSpaceCast with getAddrSpaceCast and allow no-op getAddrSpaceCast calls when types are identical
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/IR/Constants.cpp | 4 ++++
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 8f52018445d2b..baceeba8380dd 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index a38b912164b13..2d89c5bbd4a4c 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
+ // Skip cast if types are identical
+ if (C->getType() == DstTy)
+ return C;
+
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3058e577738fd..c0be71aa4cc00 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,7 +864,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
>From 62a5ee1c75545571f81d9edd22e19e9ef7cff69f Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 27 Feb 2024 14:53:51 -0600
Subject: [PATCH 18/44] Revert "Use getAddrSpaceCast"
This reverts commit 6866862d459e3c3fa65fae8ae639ddc3ff735252.
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/IR/Constants.cpp | 4 ----
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index baceeba8380dd..8f52018445d2b 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast(
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 2d89c5bbd4a4c..a38b912164b13 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2067,10 +2067,6 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
- // Skip cast if types are identical
- if (C->getType() == DstTy)
- return C;
-
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c0be71aa4cc00..3058e577738fd 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,7 +864,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast(
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
>From 052394fa28c923d130bf73a07b965a9751467302 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 27 Feb 2024 15:34:34 -0600
Subject: [PATCH 19/44] Revert "Use getAddrSpaceCast"
This reverts commit 6866862d459e3c3fa65fae8ae639ddc3ff735252.
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/IR/Constants.cpp | 4 ----
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index baceeba8380dd..8f52018445d2b 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast(
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 2d89c5bbd4a4c..a38b912164b13 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2067,10 +2067,6 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
- // Skip cast if types are identical
- if (C->getType() == DstTy)
- return C;
-
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c0be71aa4cc00..3058e577738fd 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,7 +864,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast(
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
>From 612d5a5f6966a77e82e5591f5aea475fbf886e55 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 1 Mar 2024 02:04:00 -0600
Subject: [PATCH 20/44] Write PGO
TODO: Fix tests
---
compiler-rt/lib/profile/InstrProfiling.h | 11 ++
compiler-rt/lib/profile/InstrProfilingFile.c | 148 +++++++++++++++---
.../common/include/GlobalHandler.h | 14 +-
.../common/src/GlobalHandler.cpp | 57 +++++--
.../common/src/PluginInterface.cpp | 6 +-
5 files changed, 200 insertions(+), 36 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 0123908336918..937acbd417de4 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -275,6 +275,17 @@ void __llvm_profile_get_padding_sizes_for_counters(
*/
void __llvm_profile_set_dumped();
+/*!
+ * \brief Write custom target-specific profiling data to a seperate file.
+ * Used by libomptarget for GPU PGO.
+ */
+int __llvm_write_custom_profile(const char *Target,
+ const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd,
+ const char *CountersBegin,
+ const char *CountersEnd, const char *NamesBegin,
+ const char *NamesEnd);
+
/*!
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
* symbol. Its main purpose is to enable profile runtime user to
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index f3b457d786e6b..4fc401bb9bebf 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -502,27 +502,15 @@ static FILE *getFileObject(const char *OutputName) {
return fopen(OutputName, "ab");
}
-/* Write profile data to file \c OutputName. */
-static int writeFile(const char *OutputName) {
- int RetVal;
- FILE *OutputFile;
-
- int MergeDone = 0;
+/* Get file object and merge if applicable */
+static FILE *getMergeFileObject(const char *OutputName, int *MergeDone) {
VPMergeHook = &lprofMergeValueProfData;
if (doMerging())
- OutputFile = openFileForMerging(OutputName, &MergeDone);
- else
- OutputFile = getFileObject(OutputName);
-
- if (!OutputFile)
- return -1;
-
- FreeHook = &free;
- setupIOBuffer();
- ProfDataWriter fileWriter;
- initFileWriter(&fileWriter, OutputFile);
- RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);
+ return openFileForMerging(OutputName, MergeDone);
+ return getFileObject(OutputName);
+}
+static void closeFileObject(FILE *OutputFile) {
if (OutputFile == getProfileFile()) {
fflush(OutputFile);
if (doMerging() && !__llvm_profile_is_continuous_mode_enabled()) {
@@ -531,7 +519,23 @@ static int writeFile(const char *OutputName) {
} else {
fclose(OutputFile);
}
+}
+
+/* Write profile data to file \c OutputName. */
+static int writeFile(const char *OutputName) {
+ int RetVal, MergeDone = 0;
+ FILE *OutputFile = getMergeFileObject(OutputName, &MergeDone);
+
+ if (!OutputFile)
+ return -1;
+
+ FreeHook = &free;
+ setupIOBuffer();
+ ProfDataWriter fileWriter;
+ initFileWriter(&fileWriter, OutputFile);
+ RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);
+ closeFileObject(OutputFile);
return RetVal;
}
@@ -558,10 +562,16 @@ static int writeOrderFile(const char *OutputName) {
#define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE"
+static void forceTruncateFile(const char *Filename) {
+ FILE *File = fopen(Filename, "w");
+ if (!File)
+ return;
+ fclose(File);
+}
+
static void truncateCurrentFile(void) {
const char *Filename;
char *FilenameBuf;
- FILE *File;
int Length;
Length = getCurFilenameLength();
@@ -591,10 +601,7 @@ static void truncateCurrentFile(void) {
return;
/* Truncate the file. Later we'll reopen and append. */
- File = fopen(Filename, "w");
- if (!File)
- return;
- fclose(File);
+ forceTruncateFile(Filename);
}
/* Write a partial profile to \p Filename, which is required to be backed by
@@ -1271,4 +1278,99 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
return 0;
}
+int __llvm_write_custom_profile(const char *Target,
+ const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd,
+ const char *CountersBegin,
+ const char *CountersEnd, const char *NamesBegin,
+ const char *NamesEnd) {
+ int ReturnValue = 0, FilenameLength, TargetLength, MergeDone;
+ char *FilenameBuf, *TargetFilename;
+ const char *Filename;
+
+ /* Save old profile data */
+ FILE *oldFile = getProfileFile();
+
+ // Temporarily suspend getting SIGKILL when the parent exits.
+ int PDeathSig = lprofSuspendSigKill();
+
+ if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) {
+ PROF_NOTE("Profile data not written to file: %s.\n", "already written");
+ if (PDeathSig == 1)
+ lprofRestoreSigKill();
+ return 0;
+ }
+
+ /* Get current filename */
+ FilenameLength = getCurFilenameLength();
+ FilenameBuf = (char *)COMPILER_RT_ALLOCA(FilenameLength + 1);
+ Filename = getCurFilename(FilenameBuf, 0);
+
+ /* Check the filename. */
+ if (!Filename) {
+ PROF_ERR("Failed to write file : %s\n", "Filename not set");
+ if (PDeathSig == 1)
+ lprofRestoreSigKill();
+ return -1;
+ }
+
+ /* Allocate new space for our target-specific PGO filename */
+ TargetLength = strlen(Target);
+ TargetFilename =
+ (char *)COMPILER_RT_ALLOCA(FilenameLength + TargetLength + 2);
+
+ /* Prepend "TARGET." to current filename */
+ memcpy(TargetFilename, Target, TargetLength);
+ TargetFilename[TargetLength] = '.';
+ memcpy(TargetFilename, Target, TargetLength);
+ memcpy(TargetFilename + 1 + TargetLength, Filename, FilenameLength);
+ TargetFilename[FilenameLength + 1 + TargetLength] = 0;
+
+ /* Check if there is llvm/runtime version mismatch. */
+ if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
+ PROF_ERR("Runtime and instrumentation version mismatch : "
+ "expected %d, but get %d\n",
+ INSTR_PROF_RAW_VERSION,
+ (int)GET_VERSION(__llvm_profile_get_version()));
+ if (PDeathSig == 1)
+ lprofRestoreSigKill();
+ return -1;
+ }
+
+ /* Clean old target file */
+ forceTruncateFile(TargetFilename);
+
+ /* Open target-specific PGO file */
+ MergeDone = 0;
+ FILE *OutputFile = getMergeFileObject(TargetFilename, &MergeDone);
+
+ if (!OutputFile) {
+ PROF_ERR("Failed to open file : %s\n", TargetFilename);
+ if (PDeathSig == 1)
+ lprofRestoreSigKill();
+ return -1;
+ }
+
+ FreeHook = &free;
+ setupIOBuffer();
+ ProfDataWriter fileWriter;
+ initFileWriter(&fileWriter, OutputFile);
+
+ /* Write custom data to the file */
+ ReturnValue = lprofWriteDataImpl(
+ &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
+ lprofGetVPDataReader(), NamesBegin, NamesEnd, MergeDone);
+
+ closeFileObject(OutputFile);
+
+ // Restore SIGKILL.
+ if (PDeathSig == 1)
+ lprofRestoreSigKill();
+
+ /* Restore old profiling file */
+ setProfileFile(oldFile);
+
+ return ReturnValue;
+}
+
#endif
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index f5a15ca11bfcd..af0cd4dcdf5dc 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -63,14 +63,24 @@ struct __llvm_profile_data {
#include "llvm/ProfileData/InstrProfData.inc"
};
+extern "C" {
+extern int __attribute__((weak))
+__llvm_write_custom_profile(const char *Target,
+ const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd,
+ const char *CountersBegin, const char *CountersEnd,
+ const char *NamesBegin, const char *NamesEnd);
+}
+
/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
- SmallVector<uint8_t> NamesData;
- SmallVector<SmallVector<int64_t>> Counts;
+ SmallVector<int64_t> Counts;
SmallVector<__llvm_profile_data> Data;
+ SmallVector<uint8_t> NamesData;
Triple TargetTriple;
void dump() const;
+ Error write() const;
};
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 1fce244892262..2f16b6e3c139e 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -205,7 +205,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data());
if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
return Err;
- DeviceProfileData.Counts.push_back(std::move(Counts));
+ DeviceProfileData.Counts.append(std::move(Counts));
} else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) {
// Read profiling data for this global variable
__llvm_profile_data Data{};
@@ -223,15 +223,14 @@ void GPUProfGlobals::dump() const {
<< "\n";
outs() << "======== Counters =========\n";
- for (const auto &Count : Counts) {
- outs() << "[";
- for (size_t i = 0; i < Count.size(); i++) {
- if (i == 0)
- outs() << " ";
- outs() << Count[i] << " ";
- }
- outs() << "]\n";
+ for (size_t i = 0; i < Counts.size(); i++) {
+ if (i > 0 && i % 10 == 0)
+ outs() << "\n";
+ else if (i != 0)
+ outs() << " ";
+ outs() << Counts[i];
}
+ outs() << "\n";
outs() << "========== Data ===========\n";
for (const auto &ProfData : Data) {
@@ -256,3 +255,43 @@ void GPUProfGlobals::dump() const {
Symtab.dumpNames(outs());
outs() << "===========================\n";
}
+
+Error GPUProfGlobals::write() const {
+ if (!__llvm_write_custom_profile)
+ return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
+ "The compiler-rt profiling library must be linked for "
+ "GPU PGO to work.");
+
+ size_t DataSize = Data.size() * sizeof(__llvm_profile_data),
+ CountsSize = Counts.size() * sizeof(int64_t);
+ __llvm_profile_data *DataBegin, *DataEnd;
+ char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
+
+ // Initialize array of contiguous data. We need to make sure each section is
+ // contiguous so that the PGO library can compute deltas properly
+ SmallVector<uint8_t> ContiguousData(NamesData.size() + DataSize + CountsSize);
+
+ // Compute region pointers
+ DataBegin = (__llvm_profile_data *)(ContiguousData.data() + CountsSize);
+ DataEnd =
+ (__llvm_profile_data *)(ContiguousData.data() + CountsSize + DataSize);
+ CountersBegin = (char *)ContiguousData.data();
+ CountersEnd = (char *)(ContiguousData.data() + CountsSize);
+ NamesBegin = (char *)(ContiguousData.data() + CountsSize + DataSize);
+ NamesEnd = (char *)(ContiguousData.data() + CountsSize + DataSize +
+ NamesData.size());
+
+ // Copy data to contiguous buffer
+ memcpy(DataBegin, Data.data(), DataSize);
+ memcpy(CountersBegin, Counts.data(), CountsSize);
+ memcpy(NamesBegin, NamesData.data(), NamesData.size());
+
+ // Invoke compiler-rt entrypoint
+ int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
+ DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NamesBegin, NamesEnd);
+ if (result != 0)
+ return Plugin::error("Error writing GPU PGO data to file");
+
+ return Plugin::success();
+}
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 1ea93795ce8ce..d5e6b6128152d 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -837,8 +837,10 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (!ProfOrErr)
return ProfOrErr.takeError();
- // TODO: write data to profiling file
- ProfOrErr->dump();
+ // Write data to profiling file
+ if (auto Err = ProfOrErr->write()) {
+ consumeError(std::move(Err));
+ }
}
// Delete the memory manager before deinitializing the device. Otherwise,
>From b8c916305acf08c0bd2d51b81875be5e8fc59ff3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 13 Mar 2024 20:05:32 -0500
Subject: [PATCH 21/44] Fix tests
---
.../plugins-nextgen/common/src/PluginInterface.cpp | 3 +++
openmp/libomptarget/test/offloading/pgo1.c | 8 ++------
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index d5e6b6128152d..2359ad28a25b0 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -837,6 +837,9 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (!ProfOrErr)
return ProfOrErr.takeError();
+ // Dump out profdata
+ ProfOrErr->dump();
+
// Write data to profiling file
if (auto Err = ProfOrErr->write()) {
consumeError(std::move(Err));
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
index d95793b508dcf..79e93d0f10827 100644
--- a/openmp/libomptarget/test/offloading/pgo1.c
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -32,9 +32,7 @@ int main() {
}
// CLANG-PGO: ======== Counters =========
-// CLANG-PGO-NEXT: [ 0 11 20 ]
-// CLANG-PGO-NEXT: [ 10 ]
-// CLANG-PGO-NEXT: [ 20 ]
+// CLANG-PGO-NEXT: 0 11 20 10 20
// CLANG-PGO-NEXT: ========== Data ===========
// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
@@ -55,9 +53,7 @@ int main() {
// CLANG-PGO-NEXT: test2
// LLVM-PGO: ======== Counters =========
-// LLVM-PGO-NEXT: [ 20 ]
-// LLVM-PGO-NEXT: [ 10 ]
-// LLVM-PGO-NEXT: [ 20 10 1 1 ]
+// LLVM-PGO-NEXT: 20 10 20 10 1 1
// LLVM-PGO-NEXT: ========== Data ===========
// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
>From 4568c4244d11010aacf9f1fe20bb1197008b057f Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 13 Mar 2024 21:40:20 -0500
Subject: [PATCH 22/44] Fix arguments
---
compiler-rt/lib/profile/InstrProfilingFile.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 466bfe480543b..6570bc0d74caa 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1360,10 +1360,10 @@ int __llvm_write_custom_profile(const char *Target,
initFileWriter(&fileWriter, OutputFile);
/* Write custom data to the file */
- ReturnValue = lprofWriteDataImpl(
- &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
- lprofGetVPDataReader(), NamesBegin, NamesEnd, MergeDone);
-
+ ReturnValue = lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd,
+ CountersBegin, CountersEnd, NULL, NULL,
+ lprofGetVPDataReader(), NamesBegin, NamesEnd,
+ NULL, NULL, NULL, NULL, MergeDone);
closeFileObject(OutputFile);
// Restore SIGKILL.
>From 1fc4cb9c01f251432f4a6748e69b1d8cf74cc4fb Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 18 Mar 2024 20:05:52 -0500
Subject: [PATCH 23/44] Add GPU prof flags
---
clang/include/clang/Driver/Options.td | 6 +++
clang/lib/Driver/ToolChain.cpp | 69 +++++++++++++--------------
clang/lib/Driver/ToolChains/Clang.cpp | 39 +++++++++++++--
3 files changed, 74 insertions(+), 40 deletions(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 4b1fcf1db1ad0..aab445906fa34 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1716,6 +1716,9 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
+def fprofile_instr_generate_gpu : Flag<["-"], "fprofile-instr-generate-gpu">,
+ Group<f_Group>, Visibility<[ClangOption, CLOption]>,
+ HelpText<"Generate instrumented GPU device code to collect execution counts into GPU_TARGET.default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
def fprofile_instr_generate_EQ : Joined<["-"], "fprofile-instr-generate=">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<file>">,
HelpText<"Generate instrumented code to collect execution counts into <file> (overridden by LLVM_PROFILE_FILE env var)">;
@@ -1744,6 +1747,9 @@ defm mcdc_coverage : BoolFOption<"coverage-mcdc",
def fprofile_generate : Flag<["-"], "fprofile-generate">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
+def fprofile_generate_gpu : Flag<["-"], "fprofile-generate-gpu">,
+ Group<f_Group>, Visibility<[ClangOption, CLOption]>,
+ HelpText<"Generate instrumented GPU device code to collect execution counts into GPU_TARGET.default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
def fprofile_generate_EQ : Joined<["-"], "fprofile-generate=">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
MetaVarName<"<directory>">,
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 03450fc0f57b9..f4c8aafbbd0e6 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -184,10 +184,9 @@ bool ToolChain::defaultToIEEELongDouble() const {
return PPC_LINUX_DEFAULT_IEEELONGDOUBLE && getTriple().isOSLinux();
}
-static void getAArch64MultilibFlags(const Driver &D,
- const llvm::Triple &Triple,
- const llvm::opt::ArgList &Args,
- Multilib::flags_list &Result) {
+static void getAArch64MultilibFlags(const Driver &D, const llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args,
+ Multilib::flags_list &Result) {
std::vector<StringRef> Features;
tools::aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, false);
const auto UnifiedFeatures = tools::unifyTargetFeatures(Features);
@@ -209,10 +208,9 @@ static void getAArch64MultilibFlags(const Driver &D,
Result.push_back(llvm::join(MArch, "+"));
}
-static void getARMMultilibFlags(const Driver &D,
- const llvm::Triple &Triple,
- const llvm::opt::ArgList &Args,
- Multilib::flags_list &Result) {
+static void getARMMultilibFlags(const Driver &D, const llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args,
+ Multilib::flags_list &Result) {
std::vector<StringRef> Features;
llvm::ARM::FPUKind FPUKind = tools::arm::getARMTargetFeatures(
D, Triple, Args, Features, false /*ForAs*/, true /*ForMultilib*/);
@@ -303,7 +301,7 @@ ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs) const {
return SanArgs;
}
-const XRayArgs& ToolChain::getXRayArgs() const {
+const XRayArgs &ToolChain::getXRayArgs() const {
if (!XRayArguments)
XRayArguments.reset(new XRayArgs(*this, Args));
return *XRayArguments;
@@ -394,8 +392,7 @@ static const DriverSuffix *parseDriverSuffix(StringRef ProgName, size_t &Pos) {
return DS;
}
-ParsedClangName
-ToolChain::getTargetAndModeFromProgramName(StringRef PN) {
+ParsedClangName ToolChain::getTargetAndModeFromProgramName(StringRef PN) {
std::string ProgName = normalizeProgramName(PN);
size_t SuffixPos;
const DriverSuffix *DS = parseDriverSuffix(ProgName, SuffixPos);
@@ -406,8 +403,8 @@ ToolChain::getTargetAndModeFromProgramName(StringRef PN) {
size_t LastComponent = ProgName.rfind('-', SuffixPos);
if (LastComponent == std::string::npos)
return ParsedClangName(ProgName.substr(0, SuffixEnd), DS->ModeFlag);
- std::string ModeSuffix = ProgName.substr(LastComponent + 1,
- SuffixEnd - LastComponent - 1);
+ std::string ModeSuffix =
+ ProgName.substr(LastComponent + 1, SuffixEnd - LastComponent - 1);
// Infer target from the prefix.
StringRef Prefix(ProgName);
@@ -465,9 +462,7 @@ Tool *ToolChain::getFlang() const {
return Flang.get();
}
-Tool *ToolChain::buildAssembler() const {
- return new tools::ClangAs(*this);
-}
+Tool *ToolChain::buildAssembler() const { return new tools::ClangAs(*this); }
Tool *ToolChain::buildLinker() const {
llvm_unreachable("Linking is not supported by this toolchain");
@@ -826,10 +821,12 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
return false;
return Args.hasArg(options::OPT_fprofile_generate) ||
+ Args.hasArg(options::OPT_fprofile_generate_gpu) ||
Args.hasArg(options::OPT_fprofile_generate_EQ) ||
Args.hasArg(options::OPT_fcs_profile_generate) ||
Args.hasArg(options::OPT_fcs_profile_generate_EQ) ||
Args.hasArg(options::OPT_fprofile_instr_generate) ||
+ Args.hasArg(options::OPT_fprofile_instr_generate_gpu) ||
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_forder_file_instrumentation);
@@ -842,8 +839,10 @@ bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {
}
Tool *ToolChain::SelectTool(const JobAction &JA) const {
- if (D.IsFlangMode() && getDriver().ShouldUseFlangCompiler(JA)) return getFlang();
- if (getDriver().ShouldUseClangCompiler(JA)) return getClang();
+ if (D.IsFlangMode() && getDriver().ShouldUseFlangCompiler(JA))
+ return getFlang();
+ if (getDriver().ShouldUseClangCompiler(JA))
+ return getClang();
Action::ActionClass AC = JA.getKind();
if (AC == Action::AssembleJobClass && useIntegratedAs() &&
!getTriple().isOSAIX())
@@ -865,7 +864,7 @@ std::string ToolChain::GetLinkerPath(bool *LinkerIsLLD) const {
// Get -fuse-ld= first to prevent -Wunused-command-line-argument. -fuse-ld= is
// considered as the linker flavor, e.g. "bfd", "gold", or "lld".
- const Arg* A = Args.getLastArg(options::OPT_fuse_ld_EQ);
+ const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ);
StringRef UseLinker = A ? A->getValue() : CLANG_DEFAULT_LINKER;
// --ld-path= takes precedence over -fuse-ld= and specifies the executable
@@ -950,9 +949,7 @@ types::ID ToolChain::LookupTypeForExtension(StringRef Ext) const {
return id;
}
-bool ToolChain::HasNativeLLVMSupport() const {
- return false;
-}
+bool ToolChain::HasNativeLLVMSupport() const { return false; }
bool ToolChain::isCrossCompiling() const {
llvm::Triple HostTriple(LLVM_HOST_TRIPLE);
@@ -964,7 +961,8 @@ bool ToolChain::isCrossCompiling() const {
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
return getArch() != llvm::Triple::arm && getArch() != llvm::Triple::thumb &&
- getArch() != llvm::Triple::armeb && getArch() != llvm::Triple::thumbeb;
+ getArch() != llvm::Triple::armeb &&
+ getArch() != llvm::Triple::thumbeb;
default:
return HostTriple.getArch() != getArch();
}
@@ -1046,9 +1044,7 @@ std::string ToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
return ComputeLLVMTriple(Args, InputType);
}
-std::string ToolChain::computeSysRoot() const {
- return D.SysRoot;
-}
+std::string ToolChain::computeSysRoot() const { return D.SysRoot; }
void ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
@@ -1072,12 +1068,12 @@ void ToolChain::addProfileRTLibs(const llvm::opt::ArgList &Args,
CmdArgs.push_back(getCompilerRTArgString(Args, "profile"));
}
-ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
- const ArgList &Args) const {
+ToolChain::RuntimeLibType
+ToolChain::GetRuntimeLibType(const ArgList &Args) const {
if (runtimeLibType)
return *runtimeLibType;
- const Arg* A = Args.getLastArg(options::OPT_rtlib_EQ);
+ const Arg *A = Args.getLastArg(options::OPT_rtlib_EQ);
StringRef LibName = A ? A->getValue() : CLANG_DEFAULT_RTLIB;
// Only use "platform" in tests to override CLANG_DEFAULT_RTLIB!
@@ -1098,8 +1094,8 @@ ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
return *runtimeLibType;
}
-ToolChain::UnwindLibType ToolChain::GetUnwindLibType(
- const ArgList &Args) const {
+ToolChain::UnwindLibType
+ToolChain::GetUnwindLibType(const ArgList &Args) const {
if (unwindLibType)
return *unwindLibType;
@@ -1134,7 +1130,8 @@ ToolChain::UnwindLibType ToolChain::GetUnwindLibType(
return *unwindLibType;
}
-ToolChain::CXXStdlibType ToolChain::GetCXXStdlibType(const ArgList &Args) const{
+ToolChain::CXXStdlibType
+ToolChain::GetCXXStdlibType(const ArgList &Args) const {
if (cxxStdlibType)
return *cxxStdlibType;
@@ -1290,7 +1287,7 @@ void ToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
void ToolChain::AddFilePathLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
for (const auto &LibPath : getFilePaths())
- if(LibPath.length() > 0)
+ if (LibPath.length() > 0)
CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
}
@@ -1306,9 +1303,9 @@ bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args,
if (!isOptimizationLevelFast(Args)) {
// Check if -ffast-math or -funsafe-math.
Arg *A =
- Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
- options::OPT_funsafe_math_optimizations,
- options::OPT_fno_unsafe_math_optimizations);
+ Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
+ options::OPT_funsafe_math_optimizations,
+ options::OPT_fno_unsafe_math_optimizations);
if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 055884d275ce1..106a612135f93 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -578,6 +578,35 @@ static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
}
}
+static void addPGOFlagsGPU(const ToolChain &TC, const ArgList &Args,
+ ArgStringList &CmdArgs) {
+ const Driver &D = TC.getDriver();
+ auto *ProfileClangArg = Args.getLastArg(options::OPT_fprofile_generate_gpu,
+ options::OPT_fno_profile_generate);
+ auto *ProfileLLVMArg =
+ Args.getLastArg(options::OPT_fprofile_instr_generate_gpu,
+ options::OPT_fno_profile_generate);
+ if (ProfileClangArg &&
+ ProfileClangArg->getOption().matches(options::OPT_fno_profile_generate))
+ ProfileClangArg = nullptr;
+
+ if (ProfileLLVMArg &&
+ ProfileLLVMArg->getOption().matches(options::OPT_fno_profile_generate))
+ ProfileLLVMArg = nullptr;
+
+ if (ProfileClangArg && ProfileLLVMArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << ProfileClangArg->getSpelling() << ProfileLLVMArg->getSpelling();
+ return;
+ }
+
+ if (ProfileClangArg)
+ CmdArgs.push_back("-fprofile-instrument=clang");
+
+ if (ProfileLLVMArg)
+ CmdArgs.push_back("-fprofile-instrument=llvm");
+}
+
static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
const JobAction &JA, const InputInfo &Output,
const ArgList &Args, SanitizerArgs &SanArgs,
@@ -6049,10 +6078,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_finstrument_functions_after_inlining,
options::OPT_finstrument_function_entry_bare);
- // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
- // for sampling, overhead of call arc collection is way too high and there's
- // no way to collect the output.
- if (!Triple.isNVPTX() && !Triple.isAMDGCN())
+ // NVPTX/AMDGCN PGO is handled separately
+ // GPU targets don't have their own profiling libraries and are
+ // collected/handled by the host's profiling library
+ if (Triple.isNVPTX() || Triple.isAMDGCN())
+ addPGOFlagsGPU(TC, Args, CmdArgs);
+ else
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
>From 849b244ea29ac15cae7ddaa973356cecfb0e4792 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 18 Mar 2024 21:31:32 -0500
Subject: [PATCH 24/44] Fix elf obj file
---
.../plugins-nextgen/common/src/GlobalHandler.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 88e4bee506ba8..bca66cff6558a 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -177,16 +177,19 @@ Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
GPUProfGlobals DeviceProfileData;
- auto ELFObj = getELFObjectFile(Image);
- if (!ELFObj)
- return ELFObj.takeError();
+ auto ObjFile = getELFObjectFile(Image);
+ if (!ObjFile)
+ return ObjFile.takeError();
+
+ std::unique_ptr<ELFObjectFileBase> ELFObj(
+ static_cast<ELFObjectFileBase *>(ObjFile->release()));
DeviceProfileData.TargetTriple = ELFObj->makeTriple();
// Iterate through elf symbols
for (auto &Sym : ELFObj->symbols()) {
auto NameOrErr = Sym.getName();
if (!NameOrErr)
- return ELFObj.takeError();
+ return NameOrErr.takeError();
// Check if given current global is a profiling global based
// on name
>From 55bd8d21a6224e0872002b0d1d77361eb75a3419 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 19 Mar 2024 14:50:54 -0500
Subject: [PATCH 25/44] Add GPU use profile option
---
clang/include/clang/Driver/Options.td | 5 +++++
clang/lib/Driver/ToolChains/Clang.cpp | 22 ++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index aab445906fa34..b317d4e85b957 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1768,6 +1768,11 @@ def fprofile_use_EQ : Joined<["-"], "fprofile-use=">,
Visibility<[ClangOption, CLOption]>,
MetaVarName<"<pathname>">,
HelpText<"Use instrumentation data for profile-guided optimization. If pathname is a directory, it reads from <pathname>/default.profdata. Otherwise, it reads from file <pathname>.">;
+def fprofile_use_gpu_EQ : Joined<["-"], "fprofile-use-gpu=">,
+ Group<f_Group>,
+ Visibility<[ClangOption, CLOption]>,
+ MetaVarName<"<pathname>">,
+ HelpText<"Use instrumentation data for profile-guided optimization targeting GPU">;
def fno_profile_instr_generate : Flag<["-"], "fno-profile-instr-generate">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Disable generation of profile instrumentation.">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 106a612135f93..1ea5501146950 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -586,6 +586,8 @@ static void addPGOFlagsGPU(const ToolChain &TC, const ArgList &Args,
auto *ProfileLLVMArg =
Args.getLastArg(options::OPT_fprofile_instr_generate_gpu,
options::OPT_fno_profile_generate);
+ auto *ProfileUseArg = Args.getLastArg(options::OPT_fprofile_use_gpu_EQ,
+ options::OPT_fno_profile_instr_use);
if (ProfileClangArg &&
ProfileClangArg->getOption().matches(options::OPT_fno_profile_generate))
ProfileClangArg = nullptr;
@@ -594,17 +596,37 @@ static void addPGOFlagsGPU(const ToolChain &TC, const ArgList &Args,
ProfileLLVMArg->getOption().matches(options::OPT_fno_profile_generate))
ProfileLLVMArg = nullptr;
+ if (ProfileUseArg &&
+ ProfileUseArg->getOption().matches(options::OPT_fno_profile_generate))
+ ProfileUseArg = nullptr;
+
if (ProfileClangArg && ProfileLLVMArg) {
D.Diag(diag::err_drv_argument_not_allowed_with)
<< ProfileClangArg->getSpelling() << ProfileLLVMArg->getSpelling();
return;
}
+ if (ProfileUseArg && ProfileClangArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << ProfileClangArg->getSpelling() << ProfileUseArg->getSpelling();
+ return;
+ }
+
+ if (ProfileUseArg && ProfileLLVMArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << ProfileLLVMArg->getSpelling() << ProfileUseArg->getSpelling();
+ return;
+ }
+
if (ProfileClangArg)
CmdArgs.push_back("-fprofile-instrument=clang");
if (ProfileLLVMArg)
CmdArgs.push_back("-fprofile-instrument=llvm");
+
+ if (ProfileUseArg)
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
}
static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
>From 4ebbb45baa24b52eb0f94ebaf16b6b9eb671420a Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 6 May 2024 23:13:58 -0500
Subject: [PATCH 26/44] Add more addrspace casts for GPU targets
---
.../Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++---
.../Instrumentation/PGOInstrumentation.cpp | 13 +++++++++----
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index a6b1e0d488120..dd8c027c4bbf6 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -869,6 +869,8 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
auto *TLI = &GetTLI(*Ind->getFunction());
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ DataVar, PointerType::getUnqual(M.getContext()));
// To support value profiling calls within Windows exception handlers, funclet
// information contained within operand bundles needs to be copied over to
@@ -877,11 +879,13 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
if (!IsMemOpSize) {
- Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
+ Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr,
+ Builder.getInt32(Index)};
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
OpBundles);
} else {
- Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
+ Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr,
+ Builder.getInt32(Index)};
Call = Builder.CreateCall(
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
Args, OpBundles);
@@ -1575,7 +1579,8 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(Align(8));
maybeSetComdat(ValuesVar, Fn, CntsVarName);
- ValuesPtrExpr = ValuesVar;
+ ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ ValuesVar, PointerType::getUnqual(Fn->getContext()));
}
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index f662c89a378be..fbe969f4a9c16 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -955,12 +955,15 @@ static void instrumentOneFunc(
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
assert(ToProfile && "value profiling Value is of unexpected type");
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, PointerType::get(M->getContext(), 0));
+
SmallVector<OperandBundleDef, 1> OpBundles;
populateEHOperandBundle(Cand, BlockColors, OpBundles);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
- {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
- ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
+ {NormalizedPtr, Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
+ Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
OpBundles);
}
} // IPVK_First <= Kind <= IPVK_Last
@@ -1632,10 +1635,12 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ FuncNameVar, PointerType::get(M->getContext(), 0));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
- {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
- Builder.getInt32(*CurCtrIdx), Step});
+ {NormalizedPtr, Builder.getInt64(FuncHash),
+ Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}
>From 7770b37a5a4c40bd45887f762bd7f1e652bc0ed2 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 7 May 2024 16:31:48 -0500
Subject: [PATCH 27/44] Fix params
---
compiler-rt/lib/profile/InstrProfilingFile.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 466bfe480543b..bc1d40a37a5ad 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1360,9 +1360,10 @@ int __llvm_write_custom_profile(const char *Target,
initFileWriter(&fileWriter, OutputFile);
/* Write custom data to the file */
- ReturnValue = lprofWriteDataImpl(
- &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
- lprofGetVPDataReader(), NamesBegin, NamesEnd, MergeDone);
+ ReturnValue =
+ lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
+ NULL, NULL, NULL, NamesBegin, NamesEnd, MergeDone);
closeFileObject(OutputFile);
>From 619fb6918560f0b5d0b8137d392dfb27255a7d32 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 7 May 2024 17:20:39 -0500
Subject: [PATCH 28/44] Resolve merge conflict
---
clang/lib/Driver/ToolChain.cpp | 7 -------
1 file changed, 7 deletions(-)
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index fce4168761a6d..e29f1ccf44b69 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1313,17 +1313,10 @@ bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args,
// (to keep the linker options consistent with gcc and clang itself).
if (Default && !isOptimizationLevelFast(Args)) {
// Check if -ffast-math or -funsafe-math.
-<<<<<<< HEAD
- Arg *A =
- Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
- options::OPT_funsafe_math_optimizations,
- options::OPT_fno_unsafe_math_optimizations);
-=======
Arg *A = Args.getLastArg(
options::OPT_ffast_math, options::OPT_fno_fast_math,
options::OPT_funsafe_math_optimizations,
options::OPT_fno_unsafe_math_optimizations, options::OPT_ffp_model_EQ);
->>>>>>> main
if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
>From 3f08ae9d560dbaeba4c547186c85a8c34f3dee97 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Sat, 11 May 2024 02:31:28 -0500
Subject: [PATCH 29/44] Have test read from profraw instead of dump
---
offload/test/lit.cfg | 2 +-
offload/test/offloading/pgo1.c | 90 +++++++++++++++++-----------------
2 files changed, 45 insertions(+), 47 deletions(-)
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 069110dc69a6e..94a0bc8a2b43f 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -390,7 +390,7 @@ config.substitutions.append(("%clang", config.test_c_compiler))
if config.test_fortran_compiler:
config.available_features.add('flang')
config.substitutions.append(("%flang", config.test_fortran_compiler))
-
+config.substitutions.append(("%target_triple", config.libomptarget_current_target))
config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
if config.libomptarget_current_target.startswith('nvptx') and config.cuda_path:
config.substitutions.append(("%cuda_flags", "--cuda-path=" + config.cuda_path))
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index 79e93d0f10827..9fe231e7c6716 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -1,12 +1,15 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
-// RUN: -Xclang "-fprofile-instrument=clang"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
-// RUN: --check-prefix="CLANG-PGO"
-// RUN: %libomptarget-compile-generic -fprofile-generate \
-// RUN: -Xclang "-fprofile-instrument=llvm"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate-gpu
+// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.llvm.profraw | %fcheck-generic \
// RUN: --check-prefix="LLVM-PGO"
+// RUN: %libomptarget-compile-generic -fprofile-generate-gpu
+// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.clang.profraw | %fcheck-generic \
+// RUN: --check-prefix="CLANG-PGO"
+
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
// UNSUPPORTED: aarch64-unknown-linux-gnu
@@ -31,43 +34,38 @@ int main() {
}
}
-// CLANG-PGO: ======== Counters =========
-// CLANG-PGO-NEXT: 0 11 20 10 20
-// CLANG-PGO-NEXT: ========== Data ===========
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: ======== Functions ========
-// CLANG-PGO-NEXT: pgo1.c:
-// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// CLANG-PGO-NEXT: test1
-// CLANG-PGO-NEXT: test2
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 4
+// LLVM-PGO: Function count: 20
+// LLVM-PGO: Block counts: [10, 20, 10]
+
+// LLVM-PGO-LABEL: test1:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Function count: 1
+// LLVM-PGO: Block counts: []
+
+// LLVM-PGO-LABEL: test2:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Function count: 1
+// LLVM-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 3
+// CLANG-PGO: Function count: 0
+// CLANG-PGO: Block counts: [11, 20]
+
+// CLANG-PGO-LABEL: test1:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 10
+// CLANG-PGO: Block counts: []
-// LLVM-PGO: ======== Counters =========
-// LLVM-PGO-NEXT: 20 10 20 10 1 1
-// LLVM-PGO-NEXT: ========== Data ===========
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: ======== Functions ========
-// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// LLVM-PGO-NEXT: test1
-// LLVM-PGO-NEXT: test2
+// CLANG-PGO-LABEL: test2:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 20
+// CLANG-PGO: Block counts: []
>From 09f2b39beaa9e325655d5569e5107827c1e7e955 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Sat, 11 May 2024 02:39:16 -0500
Subject: [PATCH 30/44] Remove debug dump
---
offload/plugins-nextgen/common/src/PluginInterface.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index a410deed1654f..e7559f9e6cec8 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -840,9 +840,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (!ProfOrErr)
return ProfOrErr.takeError();
- // Dump out profdata
- ProfOrErr->dump();
-
// Write data to profiling file
if (auto Err = ProfOrErr->write()) {
consumeError(std::move(Err));
>From aa895a1788969a0d27692057a1457074e9772c78 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 18 Mar 2024 21:31:32 -0500
Subject: [PATCH 31/44] Fix elf obj file
---
offload/plugins-nextgen/common/src/GlobalHandler.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 80cdcaff75528..7717e19a5b677 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -177,16 +177,19 @@ Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
GPUProfGlobals DeviceProfileData;
- auto ELFObj = getELFObjectFile(Image);
- if (!ELFObj)
- return ELFObj.takeError();
+ auto ObjFile = getELFObjectFile(Image);
+ if (!ObjFile)
+ return ObjFile.takeError();
+
+ std::unique_ptr<ELFObjectFileBase> ELFObj(
+ static_cast<ELFObjectFileBase *>(ObjFile->release()));
DeviceProfileData.TargetTriple = ELFObj->makeTriple();
// Iterate through elf symbols
for (auto &Sym : ELFObj->symbols()) {
auto NameOrErr = Sym.getName();
if (!NameOrErr)
- return ELFObj.takeError();
+ return NameOrErr.takeError();
// Check if given current global is a profiling global based
// on name
>From 2031e49c2b26864f2dab72e629eb6cbe34928a7a Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 6 May 2024 23:13:58 -0500
Subject: [PATCH 32/44] Add more addrspace casts for GPU targets
---
.../Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++---
.../Instrumentation/PGOInstrumentation.cpp | 13 +++++++++----
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index a6b1e0d488120..dd8c027c4bbf6 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -869,6 +869,8 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
auto *TLI = &GetTLI(*Ind->getFunction());
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ DataVar, PointerType::getUnqual(M.getContext()));
// To support value profiling calls within Windows exception handlers, funclet
// information contained within operand bundles needs to be copied over to
@@ -877,11 +879,13 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
if (!IsMemOpSize) {
- Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
+ Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr,
+ Builder.getInt32(Index)};
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
OpBundles);
} else {
- Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
+ Value *Args[3] = {Ind->getTargetValue(), NormalizedPtr,
+ Builder.getInt32(Index)};
Call = Builder.CreateCall(
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
Args, OpBundles);
@@ -1575,7 +1579,8 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(Align(8));
maybeSetComdat(ValuesVar, Fn, CntsVarName);
- ValuesPtrExpr = ValuesVar;
+ ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ ValuesVar, PointerType::getUnqual(Fn->getContext()));
}
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 4b51396a8baa3..ee1657ba8400e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1007,12 +1007,15 @@ static void instrumentOneFunc(
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
assert(ToProfile && "value profiling Value is of unexpected type");
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, PointerType::get(M->getContext(), 0));
+
SmallVector<OperandBundleDef, 1> OpBundles;
populateEHOperandBundle(Cand, BlockColors, OpBundles);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
- {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
- ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
+ {NormalizedPtr, Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
+ Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
OpBundles);
}
} // IPVK_First <= Kind <= IPVK_Last
@@ -1685,10 +1688,12 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ FuncNameVar, PointerType::get(M->getContext(), 0));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
- {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
- Builder.getInt32(*CurCtrIdx), Step});
+ {NormalizedPtr, Builder.getInt64(FuncHash),
+ Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}
>From be6524bb4f77de0add1e698f68115fd336f32238 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 13 May 2024 17:41:00 -0500
Subject: [PATCH 33/44] Have test read from profraw instead of dump
---
offload/test/lit.cfg | 2 +
offload/test/offloading/pgo1.c | 94 ++++++++++++++++------------------
2 files changed, 46 insertions(+), 50 deletions(-)
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 069110dc69a6e..38e6a33b01faf 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -391,6 +391,8 @@ if config.test_fortran_compiler:
config.available_features.add('flang')
config.substitutions.append(("%flang", config.test_fortran_compiler))
+config.substitutions.append(("%target_triple", config.libomptarget_current_target))
+
config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
if config.libomptarget_current_target.startswith('nvptx') and config.cuda_path:
config.substitutions.append(("%cuda_flags", "--cuda-path=" + config.cuda_path))
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index 79e93d0f10827..d22d5340f5b3e 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -1,22 +1,21 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
-// RUN: -Xclang "-fprofile-instrument=clang"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
-// RUN: --check-prefix="CLANG-PGO"
-// RUN: %libomptarget-compile-generic -fprofile-generate \
-// RUN: -Xclang "-fprofile-instrument=llvm"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=llvm"
+// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.llvm.profraw | %fcheck-generic \
// RUN: --check-prefix="LLVM-PGO"
+// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=clang"
+// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.clang.profraw | %fcheck-generic \
+// RUN: --check-prefix="CLANG-PGO"
+
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
// REQUIRES: pgo
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }
@@ -31,43 +30,38 @@ int main() {
}
}
-// CLANG-PGO: ======== Counters =========
-// CLANG-PGO-NEXT: 0 11 20 10 20
-// CLANG-PGO-NEXT: ========== Data ===========
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: ======== Functions ========
-// CLANG-PGO-NEXT: pgo1.c:
-// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// CLANG-PGO-NEXT: test1
-// CLANG-PGO-NEXT: test2
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 4
+// LLVM-PGO: Function count: 20
+// LLVM-PGO: Block counts: [10, 20, 10]
+
+// LLVM-PGO-LABEL: test1:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Function count: 1
+// LLVM-PGO: Block counts: []
+
+// LLVM-PGO-LABEL: test2:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Function count: 1
+// LLVM-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 3
+// CLANG-PGO: Function count: 0
+// CLANG-PGO: Block counts: [11, 20]
+
+// CLANG-PGO-LABEL: test1:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 10
+// CLANG-PGO: Block counts: []
-// LLVM-PGO: ======== Counters =========
-// LLVM-PGO-NEXT: 20 10 20 10 1 1
-// LLVM-PGO-NEXT: ========== Data ===========
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: ======== Functions ========
-// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// LLVM-PGO-NEXT: test1
-// LLVM-PGO-NEXT: test2
+// CLANG-PGO-LABEL: test2:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 20
+// CLANG-PGO: Block counts: []
>From e266cc7190b4639c8273d49d39e78aa644bf032b Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 16 May 2024 23:21:20 -0500
Subject: [PATCH 34/44] Fix GPU PGO names
---
clang/lib/Driver/ToolChains/Clang.cpp | 25 ++++++++++++++++++++++---
offload/test/offloading/pgo1.c | 4 ++--
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f0c2481145d0b..5a4dc1295360f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -585,13 +585,20 @@ static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
static void addPGOFlagsGPU(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
const Driver &D = TC.getDriver();
- auto *ProfileClangArg = Args.getLastArg(options::OPT_fprofile_generate_gpu,
- options::OPT_fno_profile_generate);
- auto *ProfileLLVMArg =
+ auto *ProfileClangArg =
Args.getLastArg(options::OPT_fprofile_instr_generate_gpu,
options::OPT_fno_profile_generate);
+ auto *ProfileLLVMArg = Args.getLastArg(options::OPT_fprofile_generate_gpu,
+ options::OPT_fno_profile_generate);
auto *ProfileUseArg = Args.getLastArg(options::OPT_fprofile_use_gpu_EQ,
options::OPT_fno_profile_instr_use);
+
+ auto *HostLLVMArg = Args.getLastArgNoClaim(options::OPT_fprofile_generate,
+ options::OPT_fprofile_generate_EQ);
+ auto *HostClangArg =
+ Args.getLastArgNoClaim(options::OPT_fprofile_instr_generate,
+ options::OPT_fprofile_instr_generate_EQ);
+
if (ProfileClangArg &&
ProfileClangArg->getOption().matches(options::OPT_fno_profile_generate))
ProfileClangArg = nullptr;
@@ -622,6 +629,18 @@ static void addPGOFlagsGPU(const ToolChain &TC, const ArgList &Args,
return;
}
+ if (HostLLVMArg && ProfileClangArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << HostLLVMArg->getSpelling() << ProfileClangArg->getSpelling();
+ return;
+ }
+
+ if (HostClangArg && ProfileLLVMArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << HostClangArg->getSpelling() << ProfileLLVMArg->getSpelling();
+ return;
+ }
+
if (ProfileClangArg)
CmdArgs.push_back("-fprofile-instrument=clang");
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index ec93cce2c8620..b9fc95c89791a 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -1,10 +1,10 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate-gpu
+// RUN: %libomptarget-compile-generic -fprofile-generate-gpu
// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.llvm.profraw | %fcheck-generic \
// RUN: --check-prefix="LLVM-PGO"
-// RUN: %libomptarget-compile-generic -fprofile-generate-gpu
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate-gpu
// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.clang.profraw | %fcheck-generic \
>From 2b8eb2935ec21bf0acc5c56f45837b5976560963 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 24 May 2024 19:59:33 -0500
Subject: [PATCH 35/44] Fix PGO test format
---
offload/test/offloading/pgo1.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index d22d5340f5b3e..0e75c684ed926 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -33,20 +33,17 @@ int main() {
// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 4
-// LLVM-PGO: Function count: 20
-// LLVM-PGO: Block counts: [10, 20, 10]
+// LLVM-PGO: Block counts: [20, 10, 20, 10]
// LLVM-PGO-LABEL: test1:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
-// LLVM-PGO: Function count: 1
-// LLVM-PGO: Block counts: []
+// LLVM-PGO: Block counts: [1]
// LLVM-PGO-LABEL: test2:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 1
-// LLVM-PGO: Function count: 1
-// LLVM-PGO: Block counts: []
+// LLVM-PGO: Block counts: [1]
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
>From 67f3009173d815295f36e2b37e85add1347e3bf9 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 24 May 2024 20:45:04 -0500
Subject: [PATCH 36/44] Refactor profile writer
---
compiler-rt/lib/profile/InstrProfilingFile.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index bc1d40a37a5ad..76238214c13aa 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1344,8 +1344,7 @@ int __llvm_write_custom_profile(const char *Target,
forceTruncateFile(TargetFilename);
/* Open target-specific PGO file */
- MergeDone = 0;
- FILE *OutputFile = getMergeFileObject(TargetFilename, &MergeDone);
+ FILE *OutputFile = getFileObject(TargetFilename);
if (!OutputFile) {
PROF_ERR("Failed to open file : %s\n", TargetFilename);
@@ -1356,15 +1355,11 @@ int __llvm_write_custom_profile(const char *Target,
FreeHook = &free;
setupIOBuffer();
- ProfDataWriter fileWriter;
- initFileWriter(&fileWriter, OutputFile);
-
- /* Write custom data to the file */
- ReturnValue =
- lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
- CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
- NULL, NULL, NULL, NamesBegin, NamesEnd, MergeDone);
+ /* Write custom data */
+ ReturnValue = __llvm_profile_write_buffer_internal(
+ OutputFile, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
+ NamesBegin, NamesEnd);
closeFileObject(OutputFile);
// Restore SIGKILL.
>From e8ad1322c557f7b48e2b28fe3a34a696a1103bba Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Mon, 27 May 2024 18:29:18 -0500
Subject: [PATCH 37/44] Fix refactor bug
---
compiler-rt/lib/profile/InstrProfilingFile.c | 52 ++++++++++----------
offload/test/offloading/pgo1.c | 6 ++-
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 76238214c13aa..784cb9af6169d 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -505,14 +505,6 @@ static FILE *getFileObject(const char *OutputName) {
return fopen(OutputName, "ab");
}
-/* Get file object and merge if applicable */
-static FILE *getMergeFileObject(const char *OutputName, int *MergeDone) {
- VPMergeHook = &lprofMergeValueProfData;
- if (doMerging())
- return openFileForMerging(OutputName, MergeDone);
- return getFileObject(OutputName);
-}
-
static void closeFileObject(FILE *OutputFile) {
if (OutputFile == getProfileFile()) {
fflush(OutputFile);
@@ -526,8 +518,15 @@ static void closeFileObject(FILE *OutputFile) {
/* Write profile data to file \c OutputName. */
static int writeFile(const char *OutputName) {
- int RetVal, MergeDone = 0;
- FILE *OutputFile = getMergeFileObject(OutputName, &MergeDone);
+ int RetVal;
+ FILE *OutputFile;
+
+ int MergeDone = 0;
+ VPMergeHook = &lprofMergeValueProfData;
+ if (doMerging())
+ OutputFile = openFileForMerging(OutputName, &MergeDone);
+ else
+ OutputFile = getFileObject(OutputName);
if (!OutputFile)
return -1;
@@ -565,16 +564,10 @@ static int writeOrderFile(const char *OutputName) {
#define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE"
-static void forceTruncateFile(const char *Filename) {
- FILE *File = fopen(Filename, "w");
- if (!File)
- return;
- fclose(File);
-}
-
static void truncateCurrentFile(void) {
const char *Filename;
char *FilenameBuf;
+ FILE *File;
int Length;
Length = getCurFilenameLength();
@@ -604,7 +597,10 @@ static void truncateCurrentFile(void) {
return;
/* Truncate the file. Later we'll reopen and append. */
- forceTruncateFile(Filename);
+ File = fopen(Filename, "w");
+ if (!File)
+ return;
+ fclose(File);
}
/* Write a partial profile to \p Filename, which is required to be backed by
@@ -1287,7 +1283,7 @@ int __llvm_write_custom_profile(const char *Target,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
const char *NamesEnd) {
- int ReturnValue = 0, FilenameLength, TargetLength, MergeDone;
+ int ReturnValue = 0, FilenameLength, TargetLength;
char *FilenameBuf, *TargetFilename;
const char *Filename;
@@ -1340,11 +1336,9 @@ int __llvm_write_custom_profile(const char *Target,
return -1;
}
- /* Clean old target file */
- forceTruncateFile(TargetFilename);
-
- /* Open target-specific PGO file */
- FILE *OutputFile = getFileObject(TargetFilename);
+ /* Open and truncate target-specific PGO file */
+ FILE *OutputFile = fopen(TargetFilename, "w");
+ setProfileFile(OutputFile);
if (!OutputFile) {
PROF_ERR("Failed to open file : %s\n", TargetFilename);
@@ -1357,9 +1351,13 @@ int __llvm_write_custom_profile(const char *Target,
setupIOBuffer();
/* Write custom data */
- ReturnValue = __llvm_profile_write_buffer_internal(
- OutputFile, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
- NamesBegin, NamesEnd);
+ ProfDataWriter fileWriter;
+ initFileWriter(&fileWriter, OutputFile);
+
+ /* Write custom data to the file */
+ ReturnValue = lprofWriteDataImpl(
+ &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
+ lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
closeFileObject(OutputFile);
// Restore SIGKILL.
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index 0e75c684ed926..d674711326580 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -1,10 +1,12 @@
-// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=llvm"
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN: -Xclang "-fprofile-instrument=llvm"
// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.llvm.profraw | %fcheck-generic \
// RUN: --check-prefix="LLVM-PGO"
-// RUN: %libomptarget-compile-generic -Xclang "-fprofile-instrument=clang"
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN: -Xclang "-fprofile-instrument=clang"
// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
// RUN: %target_triple.clang.profraw | %fcheck-generic \
>From 79bf08e0bea8ab32781f201cdfc096a59156f270 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 28 May 2024 00:45:47 -0500
Subject: [PATCH 38/44] Check for level in test case
TODO: Actually ensure the right level is used in the profraw file when only GPU flags are supplied
---
offload/test/offloading/pgo1.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
index 5d1a41ccab0f2..a3a242c703d96 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/pgo1.c
@@ -45,6 +45,16 @@ int main() {
// LLVM-PGO: Counters: 1
// LLVM-PGO: Block counts: [1]
+// LLVM-PGO-LABEL: Instrumentation level:
+// LLVM-PGO-SAME: IR
+// LLVM-PGO-SAME: entry_first = 0
+// LLVM-PGO-LABEL: Functions shown:
+// LLVM-PGO-SAME: 3
+// LLVM-PGO-LABEL: Maximum function count:
+// LLVM-PGO-SAME: 20
+// LLVM-PGO-LABEL: Maximum internal block count:
+// LLVM-PGO-SAME: 20
+
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-PGO: Counters: 3
@@ -62,3 +72,12 @@ int main() {
// CLANG-PGO: Counters: 1
// CLANG-PGO: Function count: 20
// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: Instrumentation level:
+// CLANG-PGO-SAME: Front-end
+// CLANG-PGO-LABEL: Functions shown:
+// CLANG-PGO-SAME: 3
+// CLANG-PGO-LABEL: Maximum function count:
+// CLANG-PGO-SAME: 20
+// CLANG-PGO-LABEL: Maximum internal block count:
+// CLANG-PGO-SAME: 20
>From 4c9f814ce14aeb6766a93f5c1d15b847b98dc29f Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 28 May 2024 12:58:43 -0500
Subject: [PATCH 39/44] Make requested clang-format change
---
offload/plugins-nextgen/common/include/GlobalHandler.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index 017d7e994f07a..1d7b9f80f9dfd 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -64,12 +64,10 @@ struct __llvm_profile_data {
};
extern "C" {
-extern int __attribute__((weak))
-__llvm_write_custom_profile(const char *Target,
- const __llvm_profile_data *DataBegin,
- const __llvm_profile_data *DataEnd,
- const char *CountersBegin, const char *CountersEnd,
- const char *NamesBegin, const char *NamesEnd);
+extern int __attribute__((weak)) __llvm_write_custom_profile(
+ const char *Target, const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd, const char *CountersBegin,
+ const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
}
/// PGO profiling data extracted from a GPU device
>From cfe166091ca91623d356d2dde41b64cefe98e472 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 30 May 2024 18:15:55 -0500
Subject: [PATCH 40/44] Check for version global on GPU
---
compiler-rt/include/profile/InstrProfData.inc | 2 +-
compiler-rt/lib/profile/InstrProfiling.h | 3 ++-
.../lib/profile/InstrProfilingBuffer.c | 3 ++-
compiler-rt/lib/profile/InstrProfilingFile.c | 14 +++++++++----
.../lib/profile/InstrProfilingInternal.h | 3 ++-
.../lib/profile/InstrProfilingWriter.c | 20 +++++++++----------
.../llvm/ProfileData/InstrProfData.inc | 2 +-
.../Instrumentation/PGOInstrumentation.cpp | 5 ++++-
.../llvm-profdata/binary-ids-padding.test | 2 +-
...alformed-not-space-for-another-header.test | 2 +-
.../malformed-num-counters-zero.test | 2 +-
.../malformed-ptr-to-counter-array.test | 2 +-
.../common/include/GlobalHandler.h | 13 ++++++++----
.../common/src/GlobalHandler.cpp | 16 ++++++++++++---
14 files changed, 58 insertions(+), 31 deletions(-)
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index e9866d94b762c..f0a260483429c 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index ef1292a45bf01..34b7d85ad6684 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -305,7 +305,8 @@ int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
- const char *NamesEnd);
+ const char *NamesEnd,
+ const uint64_t *VersionOverride);
/*!
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 1c451d7ec7563..b406e8db74f3f 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
- /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
+ /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
+ __llvm_profile_get_version());
}
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 784cb9af6169d..947a3ff89bc1f 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1282,7 +1282,8 @@ int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
- const char *NamesEnd) {
+ const char *NamesEnd,
+ const uint64_t *VersionOverride) {
int ReturnValue = 0, FilenameLength, TargetLength;
char *FilenameBuf, *TargetFilename;
const char *Filename;
@@ -1354,10 +1355,15 @@ int __llvm_write_custom_profile(const char *Target,
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);
+ uint64_t Version = __llvm_profile_get_version();
+ if (VersionOverride)
+ Version = *VersionOverride;
+
/* Write custom data to the file */
- ReturnValue = lprofWriteDataImpl(
- &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
- lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
+ ReturnValue =
+ lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
+ NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
closeFileObject(OutputFile);
// Restore SIGKILL.
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index d5bd0e41fb129..2b9f687e7f886 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
- const char *VNamesEnd, int SkipNameDataWrite);
+ const char *VNamesEnd, int SkipNameDataWrite,
+ uint64_t Version);
/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 8816a71155511..bcd88b30d050d 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
const char *VNamesEnd = __llvm_profile_end_vtabnames();
+ uint64_t Version = __llvm_profile_get_version();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
- VNamesBegin, VNamesEnd, SkipNameDataWrite);
+ VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
}
-COMPILER_RT_VISIBILITY int
-lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
- const __llvm_profile_data *DataEnd,
- const char *CountersBegin, const char *CountersEnd,
- const char *BitmapBegin, const char *BitmapEnd,
- VPDataReaderType *VPDataReader, const char *NamesBegin,
- const char *NamesEnd, const VTableProfData *VTableBegin,
- const VTableProfData *VTableEnd, const char *VNamesBegin,
- const char *VNamesEnd, int SkipNameDataWrite) {
+COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
+ ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd, const char *CountersBegin,
+ const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
+ VPDataReaderType *VPDataReader, const char *NamesBegin,
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index e9866d94b762c..f0a260483429c 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index ee1657ba8400e..f3c68fb17ce7c 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -408,7 +408,10 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
- IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
+ if (isGPUProfTarget(M))
+ IRLevelVersionVariable->setVisibility(GlobalValue::ProtectedVisibility);
+ else
+ IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
Triple TT(M.getTargetTriple());
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index 292c582b45c52..f31aa15bfe6c9 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index 705e5efaf5875..44be2980bb2f2 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index 157c13b926a7e..9af9d65a6bdba 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
index 83cf76f68fb63..49c5ae9b0931d 100644
--- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
+++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index 1d7b9f80f9dfd..6daa8b1b85413 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,6 +13,7 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
+#include <optional>
#include <type_traits>
#include "llvm/ADT/DenseMap.h"
@@ -64,10 +65,13 @@ struct __llvm_profile_data {
};
extern "C" {
-extern int __attribute__((weak)) __llvm_write_custom_profile(
- const char *Target, const __llvm_profile_data *DataBegin,
- const __llvm_profile_data *DataEnd, const char *CountersBegin,
- const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
+extern int __attribute__((weak))
+__llvm_write_custom_profile(const char *Target,
+ const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd,
+ const char *CountersBegin, const char *CountersEnd,
+ const char *NamesBegin, const char *NamesEnd,
+ const uint64_t *VersionOverride);
}
/// PGO profiling data extracted from a GPU device
@@ -76,6 +80,7 @@ struct GPUProfGlobals {
SmallVector<__llvm_profile_data> Data;
SmallVector<uint8_t> NamesData;
Triple TargetTriple;
+ std::optional<uint64_t> Version;
void dump() const;
Error write() const;
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index bca66cff6558a..93abd0a5cea36 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -16,6 +16,7 @@
#include "Shared/Utils.h"
+#include "llvm/ProfileData/InstrProfData.inc"
#include "llvm/Support/Error.h"
#include <cstring>
@@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
return Err;
DeviceProfileData.Data.push_back(std::move(Data));
+ } else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
+ uint64_t RawVersionData;
+ GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
+ &RawVersionData);
+ if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
+ return Err;
+ DeviceProfileData.Version = RawVersionData;
}
}
return DeviceProfileData;
@@ -267,6 +275,8 @@ Error GPUProfGlobals::write() const {
CountsSize = Counts.size() * sizeof(int64_t);
__llvm_profile_data *DataBegin, *DataEnd;
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
+ const uint64_t *VersionOverride =
+ Version.has_value() ? &Version.value() : nullptr;
// Initialize array of contiguous data. We need to make sure each section is
// contiguous so that the PGO library can compute deltas properly
@@ -288,9 +298,9 @@ Error GPUProfGlobals::write() const {
memcpy(NamesBegin, NamesData.data(), NamesData.size());
// Invoke compiler-rt entrypoint
- int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
- DataBegin, DataEnd, CountersBegin,
- CountersEnd, NamesBegin, NamesEnd);
+ int result = __llvm_write_custom_profile(
+ TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NamesBegin, NamesEnd, VersionOverride);
if (result != 0)
return Plugin::error("Error writing GPU PGO data to file");
>From 5bf437618c91c882543c97d34b468d74070218fa Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 31 May 2024 12:57:07 -0500
Subject: [PATCH 41/44] Add host/device combination test
---
offload/test/offloading/{ => gpupgo}/pgo1.c | 14 ++--
offload/test/offloading/gpupgo/pgo2.c | 73 +++++++++++++++++++++
2 files changed, 81 insertions(+), 6 deletions(-)
rename offload/test/offloading/{ => gpupgo}/pgo1.c (83%)
create mode 100644 offload/test/offloading/gpupgo/pgo2.c
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c
similarity index 83%
rename from offload/test/offloading/pgo1.c
rename to offload/test/offloading/gpupgo/pgo1.c
index a3a242c703d96..7c6e55f654642 100644
--- a/offload/test/offloading/pgo1.c
+++ b/offload/test/offloading/gpupgo/pgo1.c
@@ -1,14 +1,16 @@
// RUN: %libomptarget-compile-generic -fprofile-generate-gpu
-// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
-// RUN: %target_triple.llvm.profraw | %fcheck-generic \
-// RUN: --check-prefix="LLVM-PGO"
+// RUN: %target_triple.%basename_t.llvm.profraw | \
+// RUN: %fcheck-generic --check-prefix="LLVM-PGO"
// RUN: %libomptarget-compile-generic -fprofile-instr-generate-gpu
-// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
-// RUN: %target_triple.clang.profraw | %fcheck-generic \
-// RUN: --check-prefix="CLANG-PGO"
+// RUN: %target_triple.%basename_t.clang.profraw | \
+// RUN: %fcheck-generic --check-prefix="CLANG-PGO"
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
new file mode 100644
index 0000000000000..1819573d55024
--- /dev/null
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -0,0 +1,73 @@
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN: -fprofile-generate-gpu
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.llvm.profraw | %fcheck-generic \
+// RUN: --check-prefix="LLVM-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.llvm.profraw \
+// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
+
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN: -fprofile-instr-generate-gpu
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.clang.profraw | %fcheck-generic \
+// RUN: --check-prefix="CLANG-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.clang.profraw | \
+// RUN: %fcheck-generic --check-prefix="CLANG-DEV"
+
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// REQUIRES: pgo
+
+int main() {
+ int host_var = 0;
+ for (int i = 0; i < 20; i++) {
+ host_var += i;
+ }
+
+ int device_var = 1;
+#pragma omp target
+ for (int i = 0; i < 10; i++) {
+ device_var *= i;
+ }
+}
+
+// LLVM-HOST-LABEL: main:
+// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-HOST: Counters: 3
+// LLVM-HOST: Block counts: [20, 1, 0]
+
+// LLVM-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-HOST: Counters: 2
+// LLVM-HOST: Block counts: [0, 0]
+
+// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-DEVICE: Counters: 3
+// LLVM-DEVICE: Block counts: [10, 1, 1]
+
+// CLANG-HOST-LABEL: main:
+// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-HOST: Counters: 2
+// CLANG-HOST: Function count: 1
+// CLANG-HOST: Block counts: [20]
+
+// CLANG-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-HOST: Counters: 2
+// CLANG-HOST: Function count: 0
+// CLANG-HOST: Block counts: [0]
+
+// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-DEV: Counters: 2
+// CLANG-DEV: Function count: 0
+// CLANG-DEV: Block counts: [11]
>From 253013792cb7137b11893e701497e8f62143123a Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 31 May 2024 16:59:22 -0500
Subject: [PATCH 42/44] Add PGO dump debug option
---
offload/include/Shared/Environment.h | 1 +
offload/plugins-nextgen/common/src/PluginInterface.cpp | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index d141146b6bd5a..86f6d1c6ea2d3 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -30,6 +30,7 @@ enum class DeviceDebugKind : uint32_t {
FunctionTracing = 1U << 1,
CommonIssues = 1U << 2,
AllocationTracker = 1U << 3,
+ PGODump = 1U << 4,
};
struct DeviceEnvironmentTy {
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index a479235e2c36c..a68875ea7748e 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -839,6 +839,10 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (!ProfOrErr)
return ProfOrErr.takeError();
+ if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
+ uint32_t(DeviceDebugKind::PGODump))
+ ProfOrErr->dump();
+
// Write data to profiling file
if (auto Err = ProfOrErr->write()) {
consumeError(std::move(Err));
>From 79ceacb6559a3f6ecf3fd7ec1abf768ddeb97d13 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 31 May 2024 20:29:20 -0500
Subject: [PATCH 43/44] Tighten PGO test requirements
Require compiler-rt to be an enabled runtime
---
offload/test/CMakeLists.txt | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index 32df1e47afaeb..41ab339147791 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -12,10 +12,10 @@ else()
set(LIBOMPTARGET_DEBUG False)
endif()
-if (OPENMP_STANDALONE_BUILD)
- set(LIBOMPTARGET_TEST_GPU_PGO False)
-else()
+if (NOT OPENMP_STANDALONE_BUILD AND "compiler-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
set(LIBOMPTARGET_TEST_GPU_PGO True)
+else()
+ set(LIBOMPTARGET_TEST_GPU_PGO False)
endif()
# Replace the space from user's input with ";" in case that CMake add escape
>From ff0dd62cf1b236f1373fd3b70ec2875c3719ca04 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 31 May 2024 22:07:20 -0500
Subject: [PATCH 44/44] Add note about PGO debug flag
---
openmp/docs/design/Runtimes.rst | 1 +
1 file changed, 1 insertion(+)
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index f8a8cb87e83e6..7fc697a838e22 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1493,3 +1493,4 @@ debugging features are supported.
* Enable debugging assertions in the device. ``0x01``
* Enable diagnosing common problems during offloading . ``0x4``
* Enable device malloc statistics (amdgpu only). ``0x8``
+ * Dump device PGO counters (only if PGO on GPU is enabled). ``0x10``
More information about the cfe-commits
mailing list