[llvm] [OpenMP][clang] Register vtables on device for indirect calls runtime (PR #167011)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 26 08:31:09 PST 2025


https://github.com/Jason-VanBeusekom updated https://github.com/llvm/llvm-project/pull/167011

>From d4adcdd4bf35eb18dd7b9f1b9e6d02a5c77d0b98 Mon Sep 17 00:00:00 2001
From: jason-van-beusekom <jason.van-beusekom at hpe.com>
Date: Fri, 7 Nov 2025 10:40:53 -0600
Subject: [PATCH] [OpenMP][offload] Register Vtables runtime support for
 indirect calls

Key Changes:

	-Introduced a new flag OMP_DECLARE_TARGET_INDIRECT_VTABLE
	 to mark VTable registrations
	-Modified setupIndirectCallTable to support both VTable
	 entries and indirect function pointers

This is commit (1/3) to support indirect call and virtual function
mapping to the device:
Register Vtable PR (2/3): https://github.com/llvm/llvm-project/pull/159856
Codegen / _llvm_omp_indirect_call_lookup PR (3/3): https://github.com/llvm/llvm-project/pull/159857
---
 offload/include/omptarget.h                   |   2 +
 offload/libomptarget/PluginManager.cpp        |  14 ++-
 offload/libomptarget/device.cpp               |  63 ++++++++---
 .../test/api/omp_indirect_call_table_manual.c | 107 ++++++++++++++++++
 4 files changed, 167 insertions(+), 19 deletions(-)
 create mode 100644 offload/test/api/omp_indirect_call_table_manual.c

diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..3317441f04eba 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
   OMP_DECLARE_TARGET_INDIRECT = 0x08,
   /// This is an entry corresponding to a requirement to be registered.
   OMP_REGISTER_REQUIRES = 0x10,
+  /// Mark the entry global as being an indirect vtable.
+  OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
 };
 
 enum TargetAllocTy : int32_t {
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index b57a2f815cba6..61189f578d8e4 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -434,20 +434,22 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
 
         llvm::offloading::EntryTy DeviceEntry = Entry;
         if (Entry.Size) {
-          if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
-                                     &DeviceEntry.Address) != OFFLOAD_SUCCESS)
-            REPORT("Failed to load symbol %s\n", Entry.SymbolName);
+          if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE))
+            if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
+                                       &DeviceEntry.Address) != OFFLOAD_SUCCESS)
+              REPORT("Failed to load symbol %s\n", Entry.SymbolName);
 
           // If unified memory is active, the corresponding global is a device
           // reference to the host global. We need to initialize the pointer on
           // the device to point to the memory on the host.
-          if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
-              (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
+          if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+              !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+              ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+               (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)))
             if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
                                         Entry.Address,
                                         Entry.Size) != OFFLOAD_SUCCESS)
               REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
-          }
         } else if (Entry.Address) {
           if (Device.RTL->get_function(Binary, Entry.SymbolName,
                                        &DeviceEntry.Address) != OFFLOAD_SUCCESS)
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 71423ae0c94d9..d5436bde47ba5 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -112,21 +112,58 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image,
   llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
   for (const auto &Entry : Entries) {
     if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP ||
-        Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
+        Entry.Size == 0 ||
+        (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+         !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE)))
       continue;
 
-    assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
-    auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
-
-    void *Ptr;
-    if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
-      return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
-                                       "failed to load %s", Entry.SymbolName);
-
-    HstPtr = Entry.Address;
-    if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
-      return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
-                                       "failed to load %s", Entry.SymbolName);
+    size_t PtrSize = sizeof(void *);
+    if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) {
+      // This is a VTable entry, the current entry is the first index of the
+      // VTable and Entry.Size is the total size of the VTable. Unlike the
+      // indirect function case below, the Global is not of size Entry.Size and
+      // is instead of size PtrSize (sizeof(void*)).
+      void *Vtable;
+      void *res;
+      if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", Entry.SymbolName);
+
+      // HstPtr = Entry.Address;
+      if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", Entry.SymbolName);
+      if (Device.synchronize(AsyncInfo))
+        return error::createOffloadError(
+            error::ErrorCode::INVALID_BINARY,
+            "failed to synchronize after retrieving %s", Entry.SymbolName);
+      // Calculate and emplace entire Vtable from first Vtable byte
+      for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) {
+        auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+        HstPtr = reinterpret_cast<void *>(
+            reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize);
+        DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) +
+                                          i * PtrSize);
+      }
+    } else {
+      // Indirect function case: Entry.Size should equal PtrSize since we're
+      // dealing with a single function pointer (not a VTable)
+      assert(Entry.Size == PtrSize && "Global not a function pointer?");
+      auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+      void *Ptr;
+      if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", Entry.SymbolName);
+
+      HstPtr = Entry.Address;
+      if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", Entry.SymbolName);
+    }
+    if (Device.synchronize(AsyncInfo))
+      return error::createOffloadError(
+          error::ErrorCode::INVALID_BINARY,
+          "failed to synchronize after retrieving %s", Entry.SymbolName);
   }
 
   // If we do not have any indirect globals we exit early.
diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c
new file mode 100644
index 0000000000000..e958d47d69dad
--- /dev/null
+++ b/offload/test/api/omp_indirect_call_table_manual.c
@@ -0,0 +1,107 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+typedef struct {
+  uint64_t Reserved;
+  uint16_t Version;
+  uint16_t Kind; // OpenMP==1
+  uint32_t Flags;
+  void *Address;
+  char *SymbolName;
+  uint64_t Size;
+  uint64_t Data;
+  void *AuxAddr;
+} __tgt_offload_entry;
+
+enum OpenMPOffloadingDeclareTargetFlags {
+  /// Mark the entry global as having a 'link' attribute.
+  OMP_DECLARE_TARGET_LINK = 0x01,
+  /// Mark the entry global as being an indirectly callable function.
+  OMP_DECLARE_TARGET_INDIRECT = 0x08,
+  /// This is an entry corresponding to a requirement to be registered.
+  OMP_REGISTER_REQUIRES = 0x10,
+  /// Mark the entry global as being an indirect vtable.
+  OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
+};
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Provided by the runtime.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr);
+#pragma omp declare target to(__llvm_omp_indirect_call_lookup)                 \
+    device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// We assume unified addressing on the CPU target.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; }
+#pragma omp end declare variant
+
+#pragma omp begin declare target
+void foo(int *i) { *i += 1; }
+void bar(int *i) { *i += 10; }
+void baz(int *i) { *i += 100; }
+#pragma omp end declare target
+
+typedef void (*fptr_t)(int *i);
+
+// Dispatch Table - declare separately on host and device to avoid
+// registering with the library; this also allows us to use separate
+// names, which is convenient for debugging.  This dispatchTable is
+// intended to mimic what Clang emits for C++ vtables.
+fptr_t dispatchTable[] = {foo, bar, baz};
+#pragma omp begin declare target device_type(nohost)
+fptr_t GPUdispatchTable[] = {foo, bar, baz};
+fptr_t *GPUdispatchTablePtr = GPUdispatchTable;
+#pragma omp end declare target
+
+// Define "manual" OpenMP offload entries, where we emit Clang
+// offloading entry structure definitions in the appropriate ELF
+// section.  This allows us to  emulate the offloading entries that Clang would
+// normally emit for us
+
+__attribute__((weak, section("llvm_offload_entries"), aligned(8)))
+const __tgt_offload_entry __offloading_entry[] = {{
+    0ULL,                               // Reserved
+    1,                                  // Version
+    1,                                  // Kind
+    OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags
+    &dispatchTable,                     // Address
+    "GPUdispatchTablePtr",              // SymbolName
+    (size_t)(sizeof(dispatchTable)),    // Size
+    0ULL,                               // Data
+    NULL                                // AuxAddr
+}};
+
+// Mimic how Clang emits vtable pointers for C++ classes
+typedef struct {
+  fptr_t *dispatchPtr;
+} myClass;
+
+// ---------------------------------------------------------------------------
+int main() {
+  myClass obj_foo = {dispatchTable + 0};
+  myClass obj_bar = {dispatchTable + 1};
+  myClass obj_baz = {dispatchTable + 2};
+  int aaa = 0;
+
+#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz)
+  {
+    // Lookup
+    fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr);
+    fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr);
+    fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr);
+    foo_ptr[0](&aaa);
+    bar_ptr[0](&aaa);
+    baz_ptr[0](&aaa);
+  }
+
+  assert(aaa == 111);
+  // CHECK: PASS
+  printf("PASS\n");
+  return 0;
+}



More information about the llvm-commits mailing list