[clang] [compiler-rt] [llvm] [PGO][Offload] Allow PGO flags to be used on GPU targets (PR #94268)
Ethan Luis McDonough via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 10:05:32 PDT 2025
https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/94268
>From 3a2047c273d948d035b50eb486b772d5b3bdc401 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 18 Mar 2025 16:20:14 -0500
Subject: [PATCH 1/5] [PGO][Offload] Allow PGO flags to be used on GPU targets
---
clang/lib/Driver/ToolChains/Clang.cpp | 6 +-
clang/test/Driver/cuda-no-pgo-or-coverage.cu | 33 --------
compiler-rt/include/profile/InstrProfData.inc | 2 +-
compiler-rt/lib/profile/InstrProfiling.h | 3 +-
.../lib/profile/InstrProfilingBuffer.c | 3 +-
compiler-rt/lib/profile/InstrProfilingFile.c | 22 +++--
.../lib/profile/InstrProfilingInternal.h | 3 +-
.../lib/profile/InstrProfilingWriter.c | 20 ++---
.../llvm/ProfileData/InstrProfData.inc | 2 +-
.../Instrumentation/PGOInstrumentation.cpp | 5 +-
.../llvm-profdata/binary-ids-padding.test | 2 +-
...alformed-not-space-for-another-header.test | 2 +-
.../malformed-num-counters-zero.test | 2 +-
.../malformed-ptr-to-counter-array.test | 2 +-
.../common/include/GlobalHandler.h | 6 +-
.../common/src/GlobalHandler.cpp | 18 +++-
offload/test/offloading/gpupgo/pgo1.c | 84 +++++++++++++++++++
offload/test/offloading/gpupgo/pgo2.c | 76 +++++++++++++++++
offload/test/offloading/pgo1.c | 66 ---------------
19 files changed, 220 insertions(+), 137 deletions(-)
delete mode 100644 clang/test/Driver/cuda-no-pgo-or-coverage.cu
create mode 100644 offload/test/offloading/gpupgo/pgo1.c
create mode 100644 offload/test/offloading/gpupgo/pgo2.c
delete mode 100644 offload/test/offloading/pgo1.c
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 1012128085c7a..e0f1206496486 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
options::OPT_fno_convergent_functions);
- // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
- // for sampling, overhead of call arc collection is way too high and there's
- // no way to collect the output.
- if (!Triple.isNVPTX() && !Triple.isAMDGCN())
- addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
+ addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu
deleted file mode 100644
index b84587e1e182b..0000000000000
--- a/clang/test/Driver/cuda-no-pgo-or-coverage.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-// Check that profiling/coverage arguments doen't get passed down to device-side
-// compilation.
-//
-//
-// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
-// XRUN: -fprofile-generate %s 2>&1 | \
-// XRUN: FileCheck --check-prefixes=CHECK,PROF %s
-//
-// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
-// RUN: -fprofile-instr-generate %s 2>&1 | \
-// RUN: FileCheck --check-prefixes=CHECK,PROF %s
-//
-// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
-// RUN: -coverage %s 2>&1 | \
-// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
-//
-// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
-// RUN: -ftest-coverage %s 2>&1 | \
-// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
-//
-// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
-// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \
-// RUN: FileCheck --check-prefixes=CHECK,PROF %s
-//
-//
-// CHECK-NOT: error: unsupported option '-fprofile
-// CHECK-NOT: error: invalid argument
-// CHECK-DAG: "-fcuda-is-device"
-// CHECK-NOT: "-f{{[^"/]*coverage.*}}"
-// CHECK-NOT: "-fprofile{{[^"]*}}"
-// CHECK: "-triple" "x86_64-unknown-linux-gnu"
-// PROF: "-fprofile{{.*}}"
-// GCOV: "-coverage-notes-file=
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 2cdfea9a579a4..d51b58386f168 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 77c8d6c79322d..a90558fdcfbbf 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
const __llvm_profile_data *DataEnd,
const char *CountersBegin,
const char *CountersEnd, const char *NamesBegin,
- const char *NamesEnd);
+ const char *NamesEnd,
+ const uint64_t *VersionOverride);
/*!
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 1c451d7ec7563..b406e8db74f3f 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
- /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
+ /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
+ __llvm_profile_get_version());
}
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 4667c02892505..19467429cf4c3 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
return 0;
}
-COMPILER_RT_USED int __llvm_write_custom_profile(
- const char *Target, const __llvm_profile_data *DataBegin,
- const __llvm_profile_data *DataEnd, const char *CountersBegin,
- const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
+int __llvm_write_custom_profile(const char *Target,
+ const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd,
+ const char *CountersBegin,
+ const char *CountersEnd, const char *NamesBegin,
+ const char *NamesEnd,
+ const uint64_t *VersionOverride) {
int ReturnValue = 0, FilenameLength, TargetLength;
char *FilenameBuf, *TargetFilename;
const char *Filename;
@@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);
+ uint64_t Version = __llvm_profile_get_version();
+ if (VersionOverride)
+ Version = *VersionOverride;
+
/* Write custom data to the file */
- ReturnValue = lprofWriteDataImpl(
- &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
- lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
+ ReturnValue =
+ lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
+ NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
closeFileObject(OutputFile);
// Restore SIGKILL.
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index b100343ca04f9..03df71828b91d 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
VPDataReaderType *VPDataReader, const char *NamesBegin,
const char *NamesEnd, const VTableProfData *VTableBegin,
const VTableProfData *VTableEnd, const char *VNamesBegin,
- const char *VNamesEnd, int SkipNameDataWrite);
+ const char *VNamesEnd, int SkipNameDataWrite,
+ uint64_t Version);
/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 8816a71155511..bcd88b30d050d 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
const char *VNamesEnd = __llvm_profile_end_vtabnames();
+ uint64_t Version = __llvm_profile_get_version();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
- VNamesBegin, VNamesEnd, SkipNameDataWrite);
+ VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
}
-COMPILER_RT_VISIBILITY int
-lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
- const __llvm_profile_data *DataEnd,
- const char *CountersBegin, const char *CountersEnd,
- const char *BitmapBegin, const char *BitmapEnd,
- VPDataReaderType *VPDataReader, const char *NamesBegin,
- const char *NamesEnd, const VTableProfData *VTableBegin,
- const VTableProfData *VTableEnd, const char *VNamesBegin,
- const char *VNamesEnd, int SkipNameDataWrite) {
+COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
+ ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
+ const __llvm_profile_data *DataEnd, const char *CountersBegin,
+ const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
+ VPDataReaderType *VPDataReader, const char *NamesBegin,
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 2cdfea9a579a4..d51b58386f168 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a8055979acaa2..ea4be07d0c8c8 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -462,7 +462,10 @@ createIRLevelProfileFlagVar(Module &M,
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
- IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
+ if (isGPUProfTarget(M))
+ IRLevelVersionVariable->setVisibility(GlobalValue::ProtectedVisibility);
+ else
+ IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
Triple TT(M.getTargetTriple());
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index 292c582b45c52..f31aa15bfe6c9 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index 705e5efaf5875..44be2980bb2f2 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index 157c13b926a7e..9af9d65a6bdba 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
index 83cf76f68fb63..49c5ae9b0931d 100644
--- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
+++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index e030ab9e6b61f..5c763d6c71726 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,6 +13,7 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
+#include <optional>
#include <type_traits>
#include "llvm/ADT/DenseMap.h"
@@ -67,15 +68,16 @@ extern "C" {
extern int __attribute__((weak)) __llvm_write_custom_profile(
const char *Target, const __llvm_profile_data *DataBegin,
const __llvm_profile_data *DataEnd, const char *CountersBegin,
- const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
+ const char *CountersEnd, const char *NamesBegin, const char *NamesEnd,
+ const uint64_t *VersionOverride);
}
-
/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
SmallVector<int64_t> Counts;
SmallVector<__llvm_profile_data> Data;
SmallVector<uint8_t> NamesData;
Triple TargetTriple;
+ std::optional<uint64_t> Version;
void dump() const;
Error write() const;
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 8783490831e25..9b9233c95e567 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -16,6 +16,7 @@
#include "Shared/Utils.h"
+#include "llvm/ProfileData/InstrProfData.inc"
#include "llvm/Support/Error.h"
#include <cstring>
@@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
return Err;
DeviceProfileData.Data.push_back(std::move(Data));
+ } else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
+ uint64_t RawVersionData;
+ GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
+ &RawVersionData);
+ if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
+ return Err;
+ DeviceProfileData.Version = RawVersionData;
}
}
return DeviceProfileData;
@@ -265,7 +273,7 @@ void GPUProfGlobals::dump() const {
}
Error GPUProfGlobals::write() const {
- if (!__llvm_write_custom_profile)
+ if (__llvm_write_custom_profile == nullptr)
return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
"The compiler-rt profiling library must be linked for "
"GPU PGO to work.");
@@ -274,6 +282,8 @@ Error GPUProfGlobals::write() const {
CountsSize = Counts.size() * sizeof(int64_t);
__llvm_profile_data *DataBegin, *DataEnd;
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
+ const uint64_t *VersionOverride =
+ Version.has_value() ? &Version.value() : nullptr;
// Initialize array of contiguous data. We need to make sure each section is
// contiguous so that the PGO library can compute deltas properly
@@ -295,9 +305,9 @@ Error GPUProfGlobals::write() const {
memcpy(NamesBegin, NamesData.data(), NamesData.size());
// Invoke compiler-rt entrypoint
- int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
- DataBegin, DataEnd, CountersBegin,
- CountersEnd, NamesBegin, NamesEnd);
+ int result = __llvm_write_custom_profile(
+ TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
+ CountersEnd, NamesBegin, NamesEnd, VersionOverride);
if (result != 0)
return Plugin::error("Error writing GPU PGO data to file");
diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c
new file mode 100644
index 0000000000000..c8011cbae83c0
--- /dev/null
+++ b/offload/test/offloading/gpupgo/pgo1.c
@@ -0,0 +1,84 @@
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN: -Xarch_device -fprofile-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.llvm.profraw | \
+// RUN: %fcheck-generic --check-prefix="LLVM-PGO"
+
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN: -Xarch_device -fprofile-instr-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.clang.profraw | \
+// RUN: %fcheck-generic --check-prefix="CLANG-PGO"
+
+// REQUIRES: gpu
+// REQUIRES: pgo
+
+int test1(int a) { return a / 2; }
+int test2(int a) { return a * 2; }
+
+int main() {
+ int m = 2;
+#pragma omp target
+ for (int i = 0; i < 10; i++) {
+ m = test1(m);
+ for (int j = 0; j < 2; j++) {
+ m = test2(m);
+ }
+ }
+}
+
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 4
+// LLVM-PGO: Block counts: [20, 10, 2, 1]
+
+// LLVM-PGO-LABEL: test1:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Block counts: [10]
+
+// LLVM-PGO-LABEL: test2:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Block counts: [20]
+
+// LLVM-PGO-LABEL: Instrumentation level:
+// LLVM-PGO-SAME: IR
+// LLVM-PGO-SAME: entry_first = 0
+// LLVM-PGO-LABEL: Functions shown:
+// LLVM-PGO-SAME: 3
+// LLVM-PGO-LABEL: Maximum function count:
+// LLVM-PGO-SAME: 20
+// LLVM-PGO-LABEL: Maximum internal block count:
+// LLVM-PGO-SAME: 10
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 3
+// CLANG-PGO: Function count: 0
+// CLANG-PGO: Block counts: [11, 20]
+
+// CLANG-PGO-LABEL: test1:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 10
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: test2:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 20
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: Instrumentation level:
+// CLANG-PGO-SAME: Front-end
+// CLANG-PGO-LABEL: Functions shown:
+// CLANG-PGO-SAME: 3
+// CLANG-PGO-LABEL: Maximum function count:
+// CLANG-PGO-SAME: 20
+// CLANG-PGO-LABEL: Maximum internal block count:
+// CLANG-PGO-SAME: 20
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
new file mode 100644
index 0000000000000..35dae74087d92
--- /dev/null
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -0,0 +1,76 @@
+// RUN: %libomptarget-compile-generic -fprofile-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.llvm.profraw | %fcheck-generic \
+// RUN: --check-prefix="LLVM-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.llvm.profraw \
+// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
+
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.clang.profraw | %fcheck-generic \
+// RUN: --check-prefix="CLANG-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.clang.profraw | \
+// RUN: %fcheck-generic --check-prefix="CLANG-DEV"
+
+// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.nogpu.profraw | %fcheck-generic \
+// RUN: --check-prefix="LLVM-HOST"
+// RUN: not test -e %target_triple.%basename_t.nogpu.profraw
+
+// REQUIRES: gpu
+// REQUIRES: pgo
+
+int main() {
+ int host_var = 0;
+ for (int i = 0; i < 20; i++) {
+ host_var += i;
+ }
+
+ int device_var = 1;
+#pragma omp target
+ for (int i = 0; i < 10; i++) {
+ device_var *= i;
+ }
+}
+
+// LLVM-HOST-LABEL: main:
+// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-HOST: Counters: 3
+// LLVM-HOST: Block counts: [20, 1, 0]
+
+// LLVM-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-HOST: Counters: 2
+// LLVM-HOST: Block counts: [0, 0]
+
+// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-DEVICE: Counters: 3
+// LLVM-DEVICE: Block counts: [10, 2, 1]
+
+// CLANG-HOST-LABEL: main:
+// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-HOST: Counters: 2
+// CLANG-HOST: Function count: 1
+// CLANG-HOST: Block counts: [20]
+
+// CLANG-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-HOST: Counters: 2
+// CLANG-HOST: Function count: 0
+// CLANG-HOST: Block counts: [0]
+
+// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-DEV: Counters: 2
+// CLANG-DEV: Function count: 0
+// CLANG-DEV: Block counts: [11]
diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
deleted file mode 100644
index 6fe4487ffb67f..0000000000000
--- a/offload/test/offloading/pgo1.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: %libomptarget-compile-generic -fprofile-generate \
-// RUN: -Xclang "-fprofile-instrument=llvm"
-// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
-// RUN: %profdata show --all-functions --counts \
-// RUN: %target_triple.llvm.profraw | %fcheck-generic \
-// RUN: --check-prefix="LLVM-PGO"
-
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
-// RUN: -Xclang "-fprofile-instrument=clang"
-// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
-// RUN: %profdata show --all-functions --counts \
-// RUN: %target_triple.clang.profraw | %fcheck-generic \
-// RUN: --check-prefix="CLANG-PGO"
-
-// REQUIRES: gpu
-// REQUIRES: pgo
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-int test1(int a) { return a / 2; }
-int test2(int a) { return a * 2; }
-
-int main() {
- int m = 2;
-#pragma omp target
- for (int i = 0; i < 10; i++) {
- m = test1(m);
- for (int j = 0; j < 2; j++) {
- m = test2(m);
- }
- }
-}
-// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
-// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// LLVM-PGO: Counters: 4
-// LLVM-PGO: Block counts: [20, 10, 2, 1]
-
-// LLVM-PGO-LABEL: test1:
-// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// LLVM-PGO: Counters: 1
-// LLVM-PGO: Block counts: [10]
-
-// LLVM-PGO-LABEL: test2:
-// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// LLVM-PGO: Counters: 1
-// LLVM-PGO: Block counts: [20]
-
-// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
-// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// CLANG-PGO: Counters: 3
-// CLANG-PGO: Function count: 0
-// CLANG-PGO: Block counts: [11, 20]
-
-// CLANG-PGO-LABEL: test1:
-// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// CLANG-PGO: Counters: 1
-// CLANG-PGO: Function count: 10
-// CLANG-PGO: Block counts: []
-
-// CLANG-PGO-LABEL: test2:
-// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// CLANG-PGO: Counters: 1
-// CLANG-PGO: Function count: 20
-// CLANG-PGO: Block counts: []
>From 3fcadedd08c7a824ecc1d59f8334559be8157d55 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Tue, 18 Mar 2025 16:41:49 -0500
Subject: [PATCH 2/5] Revert == nullptr check to !
---
offload/plugins-nextgen/common/src/GlobalHandler.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 9b9233c95e567..89f59f56f09e8 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -273,7 +273,7 @@ void GPUProfGlobals::dump() const {
}
Error GPUProfGlobals::write() const {
- if (__llvm_write_custom_profile == nullptr)
+ if (!__llvm_write_custom_profile)
return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
"The compiler-rt profiling library must be linked for "
"GPU PGO to work.");
>From 298dafc5e9553eeb69e9a52b7e9367153258db24 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 19 Mar 2025 00:37:37 -0500
Subject: [PATCH 3/5] Fix version extraction
---
clang/lib/CodeGen/CodeGenPGO.cpp | 3 +++
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 7 ++++---
offload/plugins-nextgen/common/include/GlobalHandler.h | 3 +--
offload/plugins-nextgen/common/src/GlobalHandler.cpp | 4 +---
4 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 792373839107f..ff8b1339de966 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1357,6 +1357,9 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) {
IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
llvm::Triple TT(M.getTargetTriple());
+ if (TT.isAMDGPU() || TT.isNVPTX())
+ IRLevelVersionVariable->setVisibility(
+ llvm::GlobalValue::ProtectedVisibility);
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index ea4be07d0c8c8..bc704b3f89c44 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -462,10 +462,11 @@ createIRLevelProfileFlagVar(Module &M,
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+ IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
if (isGPUProfTarget(M))
- IRLevelVersionVariable->setVisibility(GlobalValue::ProtectedVisibility);
- else
- IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
+ IRLevelVersionVariable->setVisibility(
+ llvm::GlobalValue::ProtectedVisibility);
+
Triple TT(M.getTargetTriple());
if (TT.supportsCOMDAT()) {
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index 5c763d6c71726..6def53430a7c0 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,7 +13,6 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
-#include <optional>
#include <type_traits>
#include "llvm/ADT/DenseMap.h"
@@ -77,7 +76,7 @@ struct GPUProfGlobals {
SmallVector<__llvm_profile_data> Data;
SmallVector<uint8_t> NamesData;
Triple TargetTriple;
- std::optional<uint64_t> Version;
+ uint64_t Version = INSTR_PROF_RAW_VERSION;
void dump() const;
Error write() const;
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 89f59f56f09e8..35a70d8eff901 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -282,8 +282,6 @@ Error GPUProfGlobals::write() const {
CountsSize = Counts.size() * sizeof(int64_t);
__llvm_profile_data *DataBegin, *DataEnd;
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
- const uint64_t *VersionOverride =
- Version.has_value() ? &Version.value() : nullptr;
// Initialize array of contiguous data. We need to make sure each section is
// contiguous so that the PGO library can compute deltas properly
@@ -307,7 +305,7 @@ Error GPUProfGlobals::write() const {
// Invoke compiler-rt entrypoint
int result = __llvm_write_custom_profile(
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
- CountersEnd, NamesBegin, NamesEnd, VersionOverride);
+ CountersEnd, NamesBegin, NamesEnd, &Version);
if (result != 0)
return Plugin::error("Error writing GPU PGO data to file");
>From 0dd32c30b7ed5f0c1d749c848c9077e8144f835e Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 19 Mar 2025 02:32:38 -0500
Subject: [PATCH 4/5] Manually set Version instead of changing instprof macros
---
.../lib/profile/InstrProfilingWriter.c | 1 +
.../llvm/ProfileData/InstrProfData.inc | 2 +-
.../llvm-profdata/binary-ids-padding.test | 2 +-
...alformed-not-space-for-another-header.test | 2 +-
.../malformed-num-counters-zero.test | 2 +-
offload/test/offloading/gpupgo/pgo2.c | 26 +++++++++++++++++++
6 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index bcd88b30d050d..633fdb9661162 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -308,6 +308,7 @@ COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
#include "profile/InstrProfData.inc"
}
+ Header.Version = Version;
/* On WIN64, label differences are truncated 32-bit values. Truncate
* CountersDelta to match. */
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index d51b58386f168..2cdfea9a579a4 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
+INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index f31aa15bfe6c9..292c582b45c52 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index 44be2980bb2f2..705e5efaf5875 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index 9af9d65a6bdba..157c13b926a7e 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -1,7 +1,7 @@
// Header
//
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
index 35dae74087d92..b75b0beaffdec 100644
--- a/offload/test/offloading/gpupgo/pgo2.c
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -26,6 +26,28 @@
// RUN: --check-prefix="LLVM-HOST"
// RUN: not test -e %target_triple.%basename_t.nogpu.profraw
+// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
+// RUN: -Xarch_device -fprofile-instr-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.hidf.profraw | %fcheck-generic \
+// RUN: --check-prefix="LLVM-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.hidf.profraw \
+// RUN: | %fcheck-generic --check-prefix="CLANG-DEV"
+
+// RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \
+// RUN: -Xarch_host -fprofile-instr-generate
+// RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \
+// RUN: %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %basename_t.hfdi.profraw | %fcheck-generic \
+// RUN: --check-prefix="CLANG-HOST"
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN: %target_triple.%basename_t.hfdi.profraw \
+// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
+
// REQUIRES: gpu
// REQUIRES: pgo
@@ -51,11 +73,13 @@ int main() {
// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-HOST: Counters: 2
// LLVM-HOST: Block counts: [0, 0]
+// LLVM-HOST: Instrumentation level: IR
// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-DEVICE: Counters: 3
// LLVM-DEVICE: Block counts: [10, 2, 1]
+// LLVM-DEVICE: Instrumentation level: IR
// CLANG-HOST-LABEL: main:
// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
@@ -68,9 +92,11 @@ int main() {
// CLANG-HOST: Counters: 2
// CLANG-HOST: Function count: 0
// CLANG-HOST: Block counts: [0]
+// CLANG-HOST: Instrumentation level: Front-end
// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-DEV: Counters: 2
// CLANG-DEV: Function count: 0
// CLANG-DEV: Block counts: [11]
+// CLANG-DEV: Instrumentation level: Front-end
>From afd16b0c88f96dc1fdec3a228c447558930b52a3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 19 Mar 2025 12:05:12 -0500
Subject: [PATCH 5/5] Revert Version change in instrprofdata.inc
---
compiler-rt/include/profile/InstrProfData.inc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index d51b58386f168..2cdfea9a579a4 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
#define INSTR_PROF_DATA_DEFINED
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
-INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
+INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
More information about the llvm-commits
mailing list