[clang] [compiler-rt] [llvm] [PGO][AMDGPU] Add offload profiling with uniformity-aware optimization (PR #177665)

Ethan Luis McDonough via cfe-commits cfe-commits at lists.llvm.org
Sun Feb 1 23:23:21 PST 2026


================
@@ -0,0 +1,463 @@
+//===- InstrProfilingPlatformROCm.c - Profile data ROCm platform ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrProfiling.h"
+#include "InstrProfilingInternal.h"
+#include "InstrProfilingPort.h"
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int ProcessDeviceOffloadPrf(void *DeviceOffloadPrf, int TUIndex);
+
+static int IsVerboseMode() {
+  static int IsVerbose = -1;
+  if (IsVerbose == -1)
+    IsVerbose = getenv("LLVM_PROFILE_VERBOSE") != NULL;
+  return IsVerbose;
+}
+
+/* -------------------------------------------------------------------------- */
+/*  Dynamic loading of HIP runtime symbols                                   */
+/* -------------------------------------------------------------------------- */
+
+typedef int (*hipMemcpyFromSymbolTy)(void *, const void *, size_t, size_t, int);
+typedef int (*hipGetSymbolAddressTy)(void **, const void *);
+typedef int (*hipMemcpyTy)(void *, void *, size_t, int);
+typedef int (*hipModuleGetGlobalTy)(void **, size_t *, void *, const char *);
+
+static hipMemcpyFromSymbolTy pHipMemcpyFromSymbol = NULL;
+static hipGetSymbolAddressTy pHipGetSymbolAddress = NULL;
+static hipMemcpyTy pHipMemcpy = NULL;
+static hipModuleGetGlobalTy pHipModuleGetGlobal = NULL;
+
+/* -------------------------------------------------------------------------- */
+/*  Device-to-host copies                                                     */
+/*  Keep HIP-only to avoid an HSA dependency.                                 */
+/* -------------------------------------------------------------------------- */
+
+static void EnsureHipLoaded(void) {
+  static int Initialized = 0;
+  if (Initialized)
+    return;
+  Initialized = 1;
+
+  void *Handle = dlopen("libamdhip64.so", RTLD_LAZY | RTLD_LOCAL);
----------------
EthanLuisMcDonough wrote:

I tried running the existing PGO offload tests with this build, but they fail because `-ldl` isn't included in the build command:

```FAIL: libomptarget :: amdgcn-amd-amdhsa :: offloading/gpupgo/pgo_device_only.c (264 of 450)
******************** TEST 'libomptarget :: amdgcn-amd-amdhsa :: offloading/gpupgo/pgo_device_only.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
BUILD_DIR/./bin/clang -fopenmp    -I SOURCE_DIR/llvm-project/offload/test -I BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src -L BUILD_DIR/runtimes/runtimes-bins/offload -L BUILD_DIR/./lib -L BUILD_DIR/./lib -L BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,BUILD_DIR/runtimes/runtimes-bins/offload -Wl,-rpath,BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,BUILD_DIR/./lib -Wl,-rpath,BUILD_DIR/./lib  -fopenmp-targets=amdgcn-amd-amdhsa SOURCE_DIR/llvm-project/offload/test/offloading/gpupgo/pgo_device_only.c -o BUILD_DIR/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo_device_only.c.tmp -Xoffload-linker -lompdevice -fcreate-profile      -Xarch_device -fprofile-generate
# executed command: BUILD_DIR/./bin/clang -fopenmp -I SOURCE_DIR/llvm-project/offload/test -I BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src -L BUILD_DIR/runtimes/runtimes-bins/offload -L BUILD_DIR/./lib -L BUILD_DIR/./lib -L BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,BUILD_DIR/runtimes/runtimes-bins/offload -Wl,-rpath,BUILD_DIR/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,BUILD_DIR/./lib -Wl,-rpath,BUILD_DIR/./lib -fopenmp-targets=amdgcn-amd-amdhsa SOURCE_DIR/llvm-project/offload/test/offloading/gpupgo/pgo_device_only.c -o BUILD_DIR/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo_device_only.c.tmp -Xoffload-linker -lompdevice -fcreate-profile -Xarch_device -fprofile-generate
# .---command stderr------------
# | ld.lld: error: undefined symbol: dlopen
# | >>> referenced by InstrProfilingPlatformROCm.c
# | >>>               InstrProfilingPlatformROCm.c.o:(EnsureHipLoaded) in archive BUILD_DIR/lib/clang/23/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a
# | 
# | ld.lld: error: undefined symbol: dlsym
# | >>> referenced by InstrProfilingPlatformROCm.c
# | >>>               InstrProfilingPlatformROCm.c.o:(EnsureHipLoaded) in archive BUILD_DIR/lib/clang/23/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a
# | >>> referenced by InstrProfilingPlatformROCm.c
# | >>>               InstrProfilingPlatformROCm.c.o:(EnsureHipLoaded) in archive BUILD_DIR/lib/clang/23/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a
# | >>> referenced by InstrProfilingPlatformROCm.c
# | >>>               InstrProfilingPlatformROCm.c.o:(EnsureHipLoaded) in archive BUILD_DIR/lib/clang/23/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a
# | >>> referenced 1 more times
# | 
# | ld.lld: error: undefined symbol: dlerror
# | >>> referenced by InstrProfilingPlatformROCm.c
# | >>>               InstrProfilingPlatformROCm.c.o:(EnsureHipLoaded) in archive BUILD_DIR/lib/clang/23/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a
# | BUILD_DIR/bin/clang-linker-wrapper: error: 'ld.lld' failed
# | clang: error: linker command failed with exit code 1 (use -v to see invocation)
# `-----------------------------
# error: command failed with exit status: 1

--

```

https://github.com/llvm/llvm-project/pull/177665


More information about the cfe-commits mailing list