[Openmp-commits] [clang] [llvm] [openmp] [PGO][OpenMP] Instrumentation for GPU devices (PR #76587)
Ethan Luis McDonough via Openmp-commits
openmp-commits at lists.llvm.org
Fri Feb 16 11:13:58 PST 2024
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/76587
>From 530eb982b9770190377bb0bd09c5cb715f34d484 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 15 Dec 2023 20:38:38 -0600
Subject: [PATCH 01/17] Add profiling functions to libomptarget
---
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +++
openmp/libomptarget/DeviceRTL/CMakeLists.txt | 2 ++
.../DeviceRTL/include/Profiling.h | 21 +++++++++++++++++++
.../libomptarget/DeviceRTL/src/Profiling.cpp | 19 +++++++++++++++++
4 files changed, 45 insertions(+)
create mode 100644 openmp/libomptarget/DeviceRTL/include/Profiling.h
create mode 100644 openmp/libomptarget/DeviceRTL/src/Profiling.cpp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index d22d2a8e948b00..1d887d5cb58127 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -503,6 +503,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
+__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
+__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)
+
__OMP_RTL(__last, false, Void, )
#undef __OMP_RTL
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 1ce3e1e40a80ab..55ee15d068c67b 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -89,6 +89,7 @@ set(include_files
${include_directory}/Interface.h
${include_directory}/LibC.h
${include_directory}/Mapping.h
+ ${include_directory}/Profiling.h
${include_directory}/State.h
${include_directory}/Synchronization.h
${include_directory}/Types.h
@@ -104,6 +105,7 @@ set(src_files
${source_directory}/Mapping.cpp
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
+ ${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h
new file mode 100644
index 00000000000000..68c7744cd60752
--- /dev/null
+++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h
@@ -0,0 +1,21 @@
+//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_DEVICERTL_PROFILING_H
+#define OMPTARGET_DEVICERTL_PROFILING_H
+
+extern "C" {
+
+void __llvm_profile_register_function(void *ptr);
+void __llvm_profile_register_names_function(void *ptr, long int i);
+}
+
+#endif
diff --git a/openmp/libomptarget/DeviceRTL/src/Profiling.cpp b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp
new file mode 100644
index 00000000000000..799477f5e47d27
--- /dev/null
+++ b/openmp/libomptarget/DeviceRTL/src/Profiling.cpp
@@ -0,0 +1,19 @@
+//===------- Profiling.cpp ---------------------------------------- C++ ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Profiling.h"
+
+#pragma omp begin declare target device_type(nohost)
+
+extern "C" {
+
+void __llvm_profile_register_function(void *ptr) {}
+void __llvm_profile_register_names_function(void *ptr, long int i) {}
+}
+
+#pragma omp end declare target
>From fb067d4ffe604fd68cf90b705db1942bce49dbb1 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Sat, 16 Dec 2023 01:18:41 -0600
Subject: [PATCH 02/17] Fix PGO instrumentation for GPU targets
---
clang/lib/CodeGen/CodeGenPGO.cpp | 10 ++++++++--
.../lib/Transforms/Instrumentation/InstrProfiling.cpp | 11 ++++++++---
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 81bf8ea696b164..edae6885b528ac 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -959,8 +959,14 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
unsigned Counter = (*RegionCounterMap)[S];
- llvm::Value *Args[] = {FuncNameVar,
- Builder.getInt64(FunctionHash),
+ // Make sure that pointer to global is passed in with zero addrspace
+ // This is relevant during GPU profiling
+ auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext());
+ auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty);
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ FuncNameVar, I8PtrTy);
+
+ llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
Builder.getInt32(Counter), StepV};
if (!StepV)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index fe5a0578bd9721..d2cb8155c17967 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1658,10 +1658,13 @@ void InstrLowerer::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (!isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, Data);
+ // Check for addrspace cast when profiling GPU
+ IRB.CreateCall(RuntimeRegisterF,
+ IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, Data);
+ IRB.CreateCall(RuntimeRegisterF,
+ IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
if (NamesVar) {
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
@@ -1670,7 +1673,9 @@ void InstrLowerer::emitRegistration() {
auto *NamesRegisterF =
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfNamesRegFuncName(), M);
- IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
+ IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
+ NamesVar, VoidPtrTy),
+ IRB.getInt64(NamesSize)});
}
IRB.CreateRetVoid();
>From 7a0e0efa178cc4de6a22a8f5cc3f53cd1c81ea3a Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 21 Dec 2023 00:25:46 -0600
Subject: [PATCH 03/17] Change global visibility on GPU targets
---
llvm/include/llvm/ProfileData/InstrProf.h | 4 ++++
llvm/lib/ProfileData/InstrProf.cpp | 17 +++++++++++++++--
.../Instrumentation/InstrProfiling.cpp | 15 +++++++++++----
3 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 288dc71d756aee..bf9899d867e3dd 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -171,6 +171,10 @@ inline StringRef getInstrProfCounterBiasVarName() {
/// Return the marker used to separate PGO names during serialization.
inline StringRef getInstrProfNameSeparator() { return "\01"; }
+/// Determines whether module targets a GPU eligable for PGO
+/// instrumentation
+bool isGPUProfTarget(const Module &M);
+
/// Return the modified name for function \c F suitable to be
/// used the key for profile lookup. Variable \c InLTO indicates if this
/// is called in LTO optimization passes.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 649d814cfd9de0..0d6717aeb0142c 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -410,13 +410,22 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
}
+bool isGPUProfTarget(const Module &M) {
+ const auto &triple = M.getTargetTriple();
+ return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 ||
+ triple.rfind("r600", 0) == 0;
+}
+
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
+ // Ensure profiling variables on GPU are visible to be read from host
+ if (isGPUProfTarget(M))
+ Linkage = GlobalValue::ExternalLinkage;
// We generally want to match the function's linkage, but available_externally
// and extern_weak both have the wrong semantics, and anything that doesn't
// need to link across compilation units doesn't need to be visible at all.
- if (Linkage == GlobalValue::ExternalWeakLinkage)
+ else if (Linkage == GlobalValue::ExternalWeakLinkage)
Linkage = GlobalValue::LinkOnceAnyLinkage;
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
Linkage = GlobalValue::LinkOnceODRLinkage;
@@ -430,8 +439,12 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));
+ // If the target is a GPU, make the symbol protected so it can
+ // be read from the host device
+ if (isGPUProfTarget(M))
+ FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
// Hide the symbol so that we correctly get a copy for each executable.
- if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+ else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
return FuncNameVar;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d2cb8155c17967..3b582b65190808 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1481,6 +1481,10 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+ if (isGPUProfTarget(M)) {
+ Linkage = GlobalValue::ExternalLinkage;
+ Visibility = GlobalValue::ProtectedVisibility;
+ }
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
@@ -1492,9 +1496,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
// that other copies must have the same CFG and cannot have value profiling.
// If no hash suffix, other profd copies may be referenced by code.
- if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
- (TT.isOSBinFormatELF() ||
- (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+ else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+ (TT.isOSBinFormatELF() ||
+ (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
@@ -1696,7 +1700,10 @@ bool InstrLowerer::emitRuntimeHook() {
auto *Var =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
- Var->setVisibility(GlobalValue::HiddenVisibility);
+ if (isGPUProfTarget(M))
+ Var->setVisibility(GlobalValue::ProtectedVisibility);
+ else
+ Var->setVisibility(GlobalValue::HiddenVisibility);
if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
>From fddc07908ed9aa698fe3250ddbfc5621ab4d049d Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 22 Dec 2023 23:43:29 -0600
Subject: [PATCH 04/17] Make names global public on GPU
---
llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 3b582b65190808..61fba7be3ee0ee 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1621,6 +1621,13 @@ void InstrLowerer::emitNameData() {
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
+
+ // Make names variable public if current target is a GPU
+ if (isGPUProfTarget(M)) {
+ NamesVar->setLinkage(GlobalValue::ExternalLinkage);
+ NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
+ }
+
NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
>From e9db03c70bf79f4f4ddad4b48a5aa63a37e0d4f6 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 29 Dec 2023 12:54:50 -0600
Subject: [PATCH 05/17] Read and print GPU device PGO globals
---
.../common/include/GlobalHandler.h | 27 ++++++
.../common/src/GlobalHandler.cpp | 82 +++++++++++++++++++
.../common/src/PluginInterface.cpp | 14 ++++
3 files changed, 123 insertions(+)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index fa079ac9660ee0..a82cd536487653 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -14,9 +14,11 @@
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#include <string>
+#include <vector>
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "Shared/Debug.h"
#include "Shared/Utils.h"
@@ -58,6 +60,22 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};
+typedef void *IntPtrT;
+struct __llvm_profile_data {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+/// PGO profiling data extracted from a GPU device
+struct GPUProfGlobals {
+ std::string names;
+ std::vector<std::vector<int64_t>> counts;
+ std::vector<__llvm_profile_data> data;
+ Triple targetTriple;
+
+ void dump() const;
+};
+
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
template <typename Ty> class StaticGlobalTy : public GlobalTy {
Ty Data;
@@ -172,6 +190,15 @@ class GenericGlobalHandlerTy {
return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
/* D2H */ false);
}
+
+ /// Checks whether a given image contains profiling globals.
+ bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
+
+ /// Reads profiling data from a GPU image to supplied profdata struct.
+ /// Iterates through the image symbol table and stores global values
+ /// with profiling prefixes.
+ Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image);
};
} // namespace plugin
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 3a272e228c7dfe..5dd5daec468ca5 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -176,3 +176,85 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
return Plugin::success();
}
+
+bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image) {
+ GlobalTy global(getInstrProfNamesVarName().str(), 0);
+ if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
+ consumeError(std::move(Err));
+ return false;
+ }
+ return true;
+}
+
+Expected<GPUProfGlobals>
+GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
+ DeviceImageTy &Image) {
+ GPUProfGlobals profdata;
+ const auto *elf = getOrCreateELFObjectFile(Device, Image);
+ profdata.targetTriple = elf->makeTriple();
+ // Iterate through
+ for (auto &sym : elf->symbols()) {
+ if (auto name = sym.getName()) {
+ // Check if given current global is a profiling global based
+ // on name
+ if (name->equals(getInstrProfNamesVarName())) {
+ // Read in profiled function names
+ std::vector<char> chars(sym.getSize() / sizeof(char), ' ');
+ GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
+ return Err;
+ std::string names(chars.begin(), chars.end());
+ profdata.names = std::move(names);
+ } else if (name->starts_with(getInstrProfCountersVarPrefix())) {
+ // Read global variable profiling counts
+ std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0);
+ GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
+ return Err;
+ profdata.counts.push_back(std::move(counts));
+ } else if (name->starts_with(getInstrProfDataVarPrefix())) {
+ // Read profiling data for this global variable
+ __llvm_profile_data data{};
+ GlobalTy DataGlobal(name->str(), sym.getSize(), &data);
+ if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
+ return Err;
+ profdata.data.push_back(std::move(data));
+ }
+ }
+ }
+ return profdata;
+}
+
+void GPUProfGlobals::dump() const {
+ llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str()
+ << "\n";
+
+ llvm::outs() << "======== Counters =========\n";
+ for (const auto &count : counts) {
+ llvm::outs() << "[";
+ for (size_t i = 0; i < count.size(); i++) {
+ if (i == 0)
+ llvm::outs() << " ";
+ llvm::outs() << count[i] << " ";
+ }
+ llvm::outs() << "]\n";
+ }
+
+ llvm::outs() << "========== Data ===========\n";
+ for (const auto &d : data) {
+ llvm::outs() << "{ ";
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ llvm::outs() << d.Name << " ";
+#include "llvm/ProfileData/InstrProfData.inc"
+ llvm::outs() << " }\n";
+ }
+
+ llvm::outs() << "======== Functions ========\n";
+ InstrProfSymtab symtab;
+ if (Error Err = symtab.create(StringRef(names))) {
+ consumeError(std::move(Err));
+ }
+ symtab.dumpNames(llvm::outs());
+ llvm::outs() << "===========================\n";
+}
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 3c7d1ca8998787..84ed90f03f84f1 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -811,6 +811,20 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
DeviceMemoryPoolTracking.AllocationMax);
}
+ for (auto *Image : LoadedImages) {
+ GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
+ if (!Handler.hasProfilingGlobals(*this, *Image))
+ continue;
+
+ GPUProfGlobals profdata;
+ auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
+ if (!ProfOrErr)
+ return ProfOrErr.takeError();
+
+ // TODO: write data to profiling file
+ ProfOrErr->dump();
+ }
+
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
>From e4687605d1a6ca932312025826db09dba84845a3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:06:15 -0600
Subject: [PATCH 06/17] Fix rebase bug
---
.../plugins-nextgen/common/src/GlobalHandler.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index cb71b61f4a9c4f..86742d0f77a2fe 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -178,10 +178,12 @@ Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
GPUProfGlobals profdata;
- const auto *elf = getOrCreateELFObjectFile(Device, Image);
- profdata.targetTriple = elf->makeTriple();
- // Iterate through
- for (auto &sym : elf->symbols()) {
+ auto ELFObj = getELFObjectFile(Image);
+ if (!ELFObj)
+ return ELFObj.takeError();
+ profdata.targetTriple = ELFObj->makeTriple();
+ // Iterate through elf symbols
+ for (auto &sym : ELFObj->symbols()) {
if (auto name = sym.getName()) {
// Check if given current global is a profiling global based
// on name
>From ec18ce94c227e1d43927955fa1c67360ecfcfca6 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:10:19 -0600
Subject: [PATCH 07/17] Refactor portions to be more idiomatic
---
clang/lib/CodeGen/CodeGenPGO.cpp | 4 +---
llvm/lib/ProfileData/InstrProf.cpp | 5 ++---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index edae6885b528ac..7bfcec43ee4c98 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -961,10 +961,8 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *I8Ty = llvm::Type::getInt8Ty(CGM.getLLVMContext());
- auto *I8PtrTy = llvm::PointerType::getUnqual(I8Ty);
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- FuncNameVar, I8PtrTy);
+ FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext()));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index cdcd6840bb5108..1d88da16a5ff9c 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -429,9 +429,8 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
}
bool isGPUProfTarget(const Module &M) {
- const auto &triple = M.getTargetTriple();
- return triple.rfind("nvptx", 0) == 0 || triple.rfind("amdgcn", 0) == 0 ||
- triple.rfind("r600", 0) == 0;
+ const auto &Triple = llvm::Triple(M.getTargetTriple());
+ return Triple.isAMDGPU() || Triple.isNVPTX();
}
GlobalVariable *createPGOFuncNameVar(Module &M,
>From 0872556f597056361b0a2c23cdd0be3d9745aef3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 3 Jan 2024 17:18:47 -0600
Subject: [PATCH 08/17] Reformat DeviceRTL prof functions
---
openmp/libomptarget/DeviceRTL/include/Profiling.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/openmp/libomptarget/DeviceRTL/include/Profiling.h b/openmp/libomptarget/DeviceRTL/include/Profiling.h
index 68c7744cd60752..9efc1554c176bc 100644
--- a/openmp/libomptarget/DeviceRTL/include/Profiling.h
+++ b/openmp/libomptarget/DeviceRTL/include/Profiling.h
@@ -13,9 +13,8 @@
#define OMPTARGET_DEVICERTL_PROFILING_H
extern "C" {
-
-void __llvm_profile_register_function(void *ptr);
-void __llvm_profile_register_names_function(void *ptr, long int i);
+void __llvm_profile_register_function(void *Ptr);
+void __llvm_profile_register_names_function(void *Ptr, long int I);
}
#endif
>From 62f31d1c71b5d100f38d6dc584cc138b3904581b Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 9 Jan 2024 11:52:29 -0600
Subject: [PATCH 09/17] Style changes + catch name error
---
.../common/include/GlobalHandler.h | 16 ++--
.../common/src/GlobalHandler.cpp | 87 ++++++++++---------
2 files changed, 56 insertions(+), 47 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index a803b3f76d8b25..755bb23a414e37 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,8 +13,7 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
-#include <string>
-#include <vector>
+#include <type_traits>
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -60,18 +59,19 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};
-typedef void *IntPtrT;
+using IntPtrT = void *;
struct __llvm_profile_data {
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ std::remove_const<Type>::type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};
/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
- std::string names;
- std::vector<std::vector<int64_t>> counts;
- std::vector<__llvm_profile_data> data;
- Triple targetTriple;
+ SmallVector<uint8_t> NamesData;
+ SmallVector<SmallVector<int64_t>> Counts;
+ SmallVector<__llvm_profile_data> Data;
+ Triple TargetTriple;
void dump() const;
};
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 86742d0f77a2fe..7cb672e7b26839 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Error.h"
#include <cstring>
+#include <string>
using namespace llvm;
using namespace omp;
@@ -177,73 +178,81 @@ bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
- GPUProfGlobals profdata;
+ GPUProfGlobals DeviceProfileData;
auto ELFObj = getELFObjectFile(Image);
if (!ELFObj)
return ELFObj.takeError();
- profdata.targetTriple = ELFObj->makeTriple();
+ DeviceProfileData.TargetTriple = ELFObj->makeTriple();
+
// Iterate through elf symbols
- for (auto &sym : ELFObj->symbols()) {
- if (auto name = sym.getName()) {
- // Check if given current global is a profiling global based
- // on name
- if (name->equals(getInstrProfNamesVarName())) {
- // Read in profiled function names
- std::vector<char> chars(sym.getSize() / sizeof(char), ' ');
- GlobalTy NamesGlobal(name->str(), sym.getSize(), chars.data());
- if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
- return Err;
- std::string names(chars.begin(), chars.end());
- profdata.names = std::move(names);
- } else if (name->starts_with(getInstrProfCountersVarPrefix())) {
- // Read global variable profiling counts
- std::vector<int64_t> counts(sym.getSize() / sizeof(int64_t), 0);
- GlobalTy CountGlobal(name->str(), sym.getSize(), counts.data());
- if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
- return Err;
- profdata.counts.push_back(std::move(counts));
- } else if (name->starts_with(getInstrProfDataVarPrefix())) {
- // Read profiling data for this global variable
- __llvm_profile_data data{};
- GlobalTy DataGlobal(name->str(), sym.getSize(), &data);
- if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
- return Err;
- profdata.data.push_back(std::move(data));
- }
+ for (auto &Sym : ELFObj->symbols()) {
+ auto NameOrErr = Sym.getName();
+ if (!NameOrErr)
+ return ELFObj.takeError();
+
+ // Check if given current global is a profiling global based
+ // on name
+ if (NameOrErr->equals(getInstrProfNamesVarName())) {
+ // Read in profiled function names
+ DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
+ GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
+ DeviceProfileData.NamesData.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
+ return Err;
+ } else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
+ // Read global variable profiling counts
+ SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
+ GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data());
+ if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
+ return Err;
+ DeviceProfileData.Counts.push_back(std::move(Counts));
+ } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) {
+ // Read profiling data for this global variable
+ __llvm_profile_data Data{};
+ GlobalTy DataGlobal(NameOrErr->str(), Sym.getSize(), &Data);
+ if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
+ return Err;
+ DeviceProfileData.Data.push_back(std::move(Data));
}
}
- return profdata;
+ return DeviceProfileData;
}
void GPUProfGlobals::dump() const {
- llvm::outs() << "======= GPU Profile =======\nTarget: " << targetTriple.str()
+ llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
<< "\n";
llvm::outs() << "======== Counters =========\n";
- for (const auto &count : counts) {
+ for (const auto &Count : Counts) {
llvm::outs() << "[";
- for (size_t i = 0; i < count.size(); i++) {
+ for (size_t i = 0; i < Count.size(); i++) {
if (i == 0)
llvm::outs() << " ";
- llvm::outs() << count[i] << " ";
+ llvm::outs() << Count[i] << " ";
}
llvm::outs() << "]\n";
}
llvm::outs() << "========== Data ===========\n";
- for (const auto &d : data) {
+ for (const auto &ProfData : Data) {
llvm::outs() << "{ ";
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
- llvm::outs() << d.Name << " ";
+ llvm::outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
llvm::outs() << " }\n";
}
llvm::outs() << "======== Functions ========\n";
- InstrProfSymtab symtab;
- if (Error Err = symtab.create(StringRef(names))) {
+ std::string s;
+ s.reserve(NamesData.size());
+ for (uint8_t Name : NamesData) {
+ s.push_back((char)Name);
+ }
+
+ InstrProfSymtab Symtab;
+ if (Error Err = Symtab.create(StringRef(s))) {
consumeError(std::move(Err));
}
- symtab.dumpNames(llvm::outs());
+ Symtab.dumpNames(llvm::outs());
llvm::outs() << "===========================\n";
}
>From 0c4bbeb54d189c1461affd37853aa86c3e3ca7d8 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 17 Jan 2024 19:59:06 -0600
Subject: [PATCH 10/17] Add GPU PGO test
---
.../common/src/GlobalHandler.cpp | 2 +-
openmp/libomptarget/test/CMakeLists.txt | 6 +++
openmp/libomptarget/test/lit.cfg | 3 ++
openmp/libomptarget/test/lit.site.cfg.in | 2 +-
openmp/libomptarget/test/offloading/pgo1.c | 39 +++++++++++++++++++
5 files changed, 50 insertions(+), 2 deletions(-)
create mode 100644 openmp/libomptarget/test/offloading/pgo1.c
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index 7cb672e7b26839..e5eb653d022287 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -239,7 +239,7 @@ void GPUProfGlobals::dump() const {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
llvm::outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
- llvm::outs() << " }\n";
+ llvm::outs() << "}\n";
}
llvm::outs() << "======== Functions ========\n";
diff --git a/openmp/libomptarget/test/CMakeLists.txt b/openmp/libomptarget/test/CMakeLists.txt
index a0ba233eaa5726..21233f3e252eb5 100644
--- a/openmp/libomptarget/test/CMakeLists.txt
+++ b/openmp/libomptarget/test/CMakeLists.txt
@@ -12,6 +12,12 @@ else()
set(LIBOMPTARGET_DEBUG False)
endif()
+if (OPENMP_STANDALONE_BUILD)
+ set(LIBOMPTARGET_TEST_GPU_PGO False)
+else()
+ set(LIBOMPTARGET_TEST_GPU_PGO True)
+endif()
+
# Replace the space from user's input with ";" in case that CMake add escape
# char into the lit command.
string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}")
diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg
index 19c5e5c4572227..49743f9fed7f29 100644
--- a/openmp/libomptarget/test/lit.cfg
+++ b/openmp/libomptarget/test/lit.cfg
@@ -104,6 +104,9 @@ config.available_features.add(config.libomptarget_current_target)
if config.libomptarget_has_libc:
config.available_features.add('libc')
+if config.libomptarget_test_pgo:
+ config.available_features.add('pgo')
+
# Determine whether the test system supports unified memory.
# For CUDA, this is the case with compute capability 70 (Volta) or higher.
# For all other targets, we currently assume it is.
diff --git a/openmp/libomptarget/test/lit.site.cfg.in b/openmp/libomptarget/test/lit.site.cfg.in
index 2d638118838727..494d1636af304a 100644
--- a/openmp/libomptarget/test/lit.site.cfg.in
+++ b/openmp/libomptarget/test/lit.site.cfg.in
@@ -25,6 +25,6 @@ config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@"
config.libomptarget_debug = @LIBOMPTARGET_DEBUG@
config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@
config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@
-
+config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@
# Let the main config do the real work.
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
new file mode 100644
index 00000000000000..ca8a6f502a06aa
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang"
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// REQUIRES: pgo
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+int test1(int a) { return a / 2; }
+int test2(int a) { return a * 2; }
+
+int main() {
+ int m = 2;
+#pragma omp target
+ for (int i = 0; i < 10; i++) {
+ m = test1(m);
+ for (int j = 0; j < 2; j++) {
+ m = test2(m);
+ }
+ }
+}
+
+// CHECK: ======== Counters =========
+// CHECK-NEXT: [ 0 11 20 ]
+// CHECK-NEXT: [ 10 ]
+// CHECK-NEXT: [ 20 ]
+// CHECK-NEXT: ========== Data ===========
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: ======== Functions ========
+// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// CHECK-NEXT: test1
+// CHECK-NEXT: test2
>From c7ae2a74daa93b05058fcc9bba64e0734359362c Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 17 Jan 2024 23:12:27 -0600
Subject: [PATCH 11/17] Fix PGO test formatting
---
openmp/libomptarget/test/offloading/pgo1.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
index ca8a6f502a06aa..389be19b670d76 100644
--- a/openmp/libomptarget/test/offloading/pgo1.c
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -1,4 +1,5 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate -Xclang "-fprofile-instrument=clang"
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN: -Xclang "-fprofile-instrument=clang"
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// UNSUPPORTED: x86_64-pc-linux-gnu
@@ -30,9 +31,18 @@ int main() {
// CHECK-NEXT: [ 10 ]
// CHECK-NEXT: [ 20 ]
// CHECK-NEXT: ========== Data ===========
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
// CHECK-NEXT: ======== Functions ========
// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
// CHECK-NEXT: test1
>From 8bb22072914bbb830e2788d117aedd0e0bab66ff Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 18 Jan 2024 23:15:55 -0600
Subject: [PATCH 12/17] Refactor visibility logic
---
llvm/lib/ProfileData/InstrProf.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 511571a3eed9b0..708ea63fd95e04 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -422,6 +422,16 @@ bool isGPUProfTarget(const Module &M) {
return Triple.isAMDGPU() || Triple.isNVPTX();
}
+void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
+ // If the target is a GPU, make the symbol protected so it can
+ // be read from the host device
+ if (isGPUProfTarget(M))
+ FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
+ // Hide the symbol so that we correctly get a copy for each executable.
+ else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+ FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
+}
+
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
@@ -445,14 +455,7 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));
- // If the target is a GPU, make the symbol protected so it can
- // be read from the host device
- if (isGPUProfTarget(M))
- FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
- // Hide the symbol so that we correctly get a copy for each executable.
- else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
- FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
-
+ setPGOFuncVisibility(M, FuncNameVar);
return FuncNameVar;
}
>From 9f13943f64cb16162e44902d54de53a9b1229179 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 23 Jan 2024 18:33:58 -0600
Subject: [PATCH 13/17] Add LLVM instrumentation support
This PR formerly only supported -fprofile-instrument=clang. This commit adds support for -fprofile-instrument=llvm
---
.../Instrumentation/PGOInstrumentation.cpp | 12 +++-
openmp/libomptarget/test/offloading/pgo1.c | 72 +++++++++++++------
2 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c20fc942eaf0d5..bbc8da78fd7baf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -862,6 +862,10 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
+ // Make sure that pointer to global is passed in with zero addrspace
+ // This is relevant during GPU profiling
+ auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, llvm::PointerType::getUnqual(M->getContext()));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
@@ -869,7 +873,7 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
- {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
return;
}
@@ -887,7 +891,8 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
- {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters),
+ Builder.getInt32(I)});
I += PGOBlockCoverage ? 8 : 1;
}
@@ -901,7 +906,8 @@ static void instrumentOneFunc(
Intrinsic::getDeclaration(M, PGOBlockCoverage
? Intrinsic::instrprof_cover
: Intrinsic::instrprof_increment),
- {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
+ {NormalizedPtr, CFGHash, Builder.getInt32(NumCounters),
+ Builder.getInt32(I++)});
}
// Now instrument select instructions:
diff --git a/openmp/libomptarget/test/offloading/pgo1.c b/openmp/libomptarget/test/offloading/pgo1.c
index 389be19b670d76..d95793b508dcfc 100644
--- a/openmp/libomptarget/test/offloading/pgo1.c
+++ b/openmp/libomptarget/test/offloading/pgo1.c
@@ -1,6 +1,11 @@
// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
// RUN: -Xclang "-fprofile-instrument=clang"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: --check-prefix="CLANG-PGO"
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN: -Xclang "-fprofile-instrument=llvm"
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
+// RUN: --check-prefix="LLVM-PGO"
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
@@ -26,24 +31,47 @@ int main() {
}
}
-// CHECK: ======== Counters =========
-// CHECK-NEXT: [ 0 11 20 ]
-// CHECK-NEXT: [ 10 ]
-// CHECK-NEXT: [ 20 ]
-// CHECK-NEXT: ========== Data ===========
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CHECK-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CHECK-NEXT: ======== Functions ========
-// CHECK-NEXT: pgo1.c:__omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// CHECK-NEXT: test1
-// CHECK-NEXT: test2
+// CLANG-PGO: ======== Counters =========
+// CLANG-PGO-NEXT: [ 0 11 20 ]
+// CLANG-PGO-NEXT: [ 10 ]
+// CLANG-PGO-NEXT: [ 20 ]
+// CLANG-PGO-NEXT: ========== Data ===========
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// CLANG-PGO-NEXT: ======== Functions ========
+// CLANG-PGO-NEXT: pgo1.c:
+// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// CLANG-PGO-NEXT: test1
+// CLANG-PGO-NEXT: test2
+
+// LLVM-PGO: ======== Counters =========
+// LLVM-PGO-NEXT: [ 20 ]
+// LLVM-PGO-NEXT: [ 10 ]
+// LLVM-PGO-NEXT: [ 20 10 1 1 ]
+// LLVM-PGO-NEXT: ========== Data ===========
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
+// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
+// LLVM-PGO-NEXT: ======== Functions ========
+// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
+// LLVM-PGO-NEXT: test1
+// LLVM-PGO-NEXT: test2
>From 0606f0dd1b32ef9ebe138bbc964b3921e22d95d1 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 14 Feb 2024 01:46:55 -0600
Subject: [PATCH 14/17] Use explicit addrspace instead of unqual
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index e084dda879cbc0..4c75a01222d304 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1103,7 +1103,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- FuncNameVar, llvm::PointerType::getUnqual(CGM.getLLVMContext()));
+ FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index bbc8da78fd7baf..c63b3e4ecf786a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -865,7 +865,7 @@ static void instrumentOneFunc(
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- Name, llvm::PointerType::getUnqual(M->getContext()));
+ Name, llvm::PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
>From c1f9be321678766525141214aaab74636cafbc2c Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 15 Feb 2024 19:10:09 -0600
Subject: [PATCH 15/17] Remove redundant namespaces
---
.../Instrumentation/PGOInstrumentation.cpp | 4 +--
.../common/src/GlobalHandler.cpp | 26 +++++++++----------
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c63b3e4ecf786a..3058e577738fda 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,8 +864,8 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- Name, llvm::PointerType::get(M->getContext(), 0));
+ auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index e5eb653d022287..ae270c60804d26 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -219,30 +219,30 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
}
void GPUProfGlobals::dump() const {
- llvm::outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
+ outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
<< "\n";
- llvm::outs() << "======== Counters =========\n";
+ outs() << "======== Counters =========\n";
for (const auto &Count : Counts) {
- llvm::outs() << "[";
+ outs() << "[";
for (size_t i = 0; i < Count.size(); i++) {
if (i == 0)
- llvm::outs() << " ";
- llvm::outs() << Count[i] << " ";
+ outs() << " ";
+ outs() << Count[i] << " ";
}
- llvm::outs() << "]\n";
+ outs() << "]\n";
}
- llvm::outs() << "========== Data ===========\n";
+ outs() << "========== Data ===========\n";
for (const auto &ProfData : Data) {
- llvm::outs() << "{ ";
+ outs() << "{ ";
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
- llvm::outs() << ProfData.Name << " ";
+ outs() << ProfData.Name << " ";
#include "llvm/ProfileData/InstrProfData.inc"
- llvm::outs() << "}\n";
+ outs() << "}\n";
}
- llvm::outs() << "======== Functions ========\n";
+ outs() << "======== Functions ========\n";
std::string s;
s.reserve(NamesData.size());
for (uint8_t Name : NamesData) {
@@ -253,6 +253,6 @@ void GPUProfGlobals::dump() const {
if (Error Err = Symtab.create(StringRef(s))) {
consumeError(std::move(Err));
}
- Symtab.dumpNames(llvm::outs());
- llvm::outs() << "===========================\n";
+ Symtab.dumpNames(outs());
+ outs() << "===========================\n";
}
>From 6a3ae407e69e7524f0f808329c534f8352ee1779 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 15 Feb 2024 19:15:15 -0600
Subject: [PATCH 16/17] Clang format
---
.../libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index ae270c60804d26..1fce2448922624 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -220,7 +220,7 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
void GPUProfGlobals::dump() const {
outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
- << "\n";
+ << "\n";
outs() << "======== Counters =========\n";
for (const auto &Count : Counts) {
>From 6866862d459e3c3fa65fae8ae639ddc3ff735252 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Fri, 16 Feb 2024 13:13:39 -0600
Subject: [PATCH 17/17] Use getAddrSpaceCast
Replace getPointerBitCastOrAddrSpaceCast with getAddrSpaceCast and allow no-op getAddrSpaceCast calls when types are identical
---
clang/lib/CodeGen/CodeGenPGO.cpp | 2 +-
llvm/lib/IR/Constants.cpp | 4 ++++
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 2 +-
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 8f52018445d2b0..baceeba8380ddb 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1099,7 +1099,7 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ auto *NormalizedPtr = llvm::ConstantExpr::getAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
llvm::Value *Args[] = {NormalizedPtr, Builder.getInt64(FunctionHash),
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index a38b912164b130..2d89c5bbd4a4c2 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
+ // Skip cast if types are identical
+ if (C->getType() == DstTy)
+ return C;
+
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3058e577738fda..c0be71aa4cc004 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -864,7 +864,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
- auto *NormalizedPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ auto *NormalizedPtr = ConstantExpr::getAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
More information about the Openmp-commits
mailing list