[clang] [llvm] [OpenMP][clang] Register vtables on device for indirect calls runtime (PR #167011)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 10 09:55:59 PST 2025
https://github.com/Jason-VanBeusekom updated https://github.com/llvm/llvm-project/pull/167011
>From c2aea1ee2b34c1e1e88e3cad9cc141aab54d52e0 Mon Sep 17 00:00:00 2001
From: jason-van-beusekom <jason.van-beusekom at hpe.com>
Date: Fri, 7 Nov 2025 10:39:38 -0600
Subject: [PATCH 1/2] [OpenMP][clang] Register Vtables on device for indirect
calls - clang/llvm changes
- Register Vtable's on device during codegen
- Add support in OMPIRBuilder
- Add test cases for vtable codegen
Co-authored-by: Chi-Chun Chen <chichunchen844 at gmail.com>
Co-authored-by: Jeffery Sandoval <jeffrey.sandoval at hpe.com>
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 126 ++++++++++++++++++
clang/lib/CodeGen/CGOpenMPRuntime.h | 20 +++
clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 +
clang/lib/CodeGen/CGVTables.cpp | 6 +
clang/lib/CodeGen/CGVTables.h | 4 +
clang/lib/CodeGen/CodeGenModule.h | 3 +
.../target_vtable_codegen_container.cpp | 42 ++++++
.../OpenMP/target_vtable_codegen_explicit.cpp | 48 +++++++
...rget_vtable_codegen_implicit_namespace.cpp | 43 ++++++
...rget_vtable_codegen_memberexpr_codegen.cpp | 56 ++++++++
...arget_vtable_codegen_mult_inherritence.cpp | 46 +++++++
.../OpenMP/target_vtable_codegen_nested.cpp | 82 ++++++++++++
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 5 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 20 ++-
14 files changed, 501 insertions(+), 4 deletions(-)
create mode 100644 clang/test/OpenMP/target_vtable_codegen_container.cpp
create mode 100644 clang/test/OpenMP/target_vtable_codegen_explicit.cpp
create mode 100644 clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
create mode 100644 clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
create mode 100644 clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
create mode 100644 clang/test/OpenMP/target_vtable_codegen_nested.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a503aaf613e30..77adbc80af4e4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1771,12 +1771,126 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
}
+ // Register the indirect Vtable:
+ // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
+ // size field refers to the size of memory pointed to, not the size of
+ // the pointer symbol itself (which is implicitly the size of a pointer).
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
llvm::GlobalValue::WeakODRLinkage);
}
+void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+ const VarDecl *VD) {
+ // TODO: add logic to avoid duplicate vtable registrations per
+ // translation unit; though for external linkage, this should no
+ // longer be an issue - or at least we can avoid the issue by
+ // checking for an existing offloading entry. But, perhaps the
+ // better approach is to defer emission of the vtables and offload
+ // entries until later (by tracking a list of items that need to be
+ // emitted).
+
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ // Generate a new externally visible global to point to the
+ // internally visible vtable. Doing this allows us to keep the
+ // visibility and linkage of the associated vtable unchanged while
+ // allowing the runtime to access its value. The externally
+ // visible global var needs to be emitted with a unique mangled
+ // name that won't conflict with similarly named (internal)
+ // vtables in other translation units.
+
+ // Register vtable with source location of dynamic object in map
+ // clause.
+ llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
+ CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
+ VTable->getName());
+
+ llvm::GlobalVariable *Addr = VTable;
+ size_t PointerSize = CGM.getDataLayout().getPointerSize();
+ SmallString<128> AddrName;
+ OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
+ AddrName.append("addr");
+
+ if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+ Addr = new llvm::GlobalVariable(
+ CGM.getModule(), VTable->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
+ AddrName,
+ /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
+ Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+ }
+ OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
+ AddrName, VTable,
+ CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
+ llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
+ llvm::GlobalValue::WeakODRLinkage);
+}
+
+void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
+ CXXRecordDecl *CXXRecord,
+ const VarDecl *VD) {
+ // Register C++ VTable to OpenMP Offload Entry if it's a new
+ // CXXRecordDecl.
+ if (CXXRecord && CXXRecord->isDynamicClass() &&
+ !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
+ CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
+ CGM.EmitVTable(CXXRecord);
+ CodeGenVTables VTables = CGM.getVTables();
+ llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
+ if (VTablesAddr)
+ CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
+ // Emit VTable for all the fields containing dynamic CXXRecord
+ for (const FieldDecl *Field : CXXRecord->fields()) {
+ if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
+ emitAndRegisterVTable(CGM, RecordDecl, VD);
+ }
+ // Emit VTable for all dynamic parent class
+ for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
+ if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
+ emitAndRegisterVTable(CGM, BaseDecl, VD);
+ }
+ }
+};
+
+void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
+ // Register VTable by scanning through the map clause of OpenMP target region.
+ // Get CXXRecordDecl and VarDecl from Expr.
+ auto GetVTableDecl = [](const Expr *E) {
+ QualType VDTy = E->getType();
+ CXXRecordDecl *CXXRecord = nullptr;
+ if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
+ VDTy = RefType->getPointeeType();
+ if (VDTy->isPointerType())
+ CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
+ else
+ CXXRecord = VDTy->getAsCXXRecordDecl();
+
+ const VarDecl *VD = nullptr;
+ if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+ VD = cast<VarDecl>(DRE->getDecl());
+ else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
+ if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
+ if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl())) {
+ VD = BaseVD;
+ }
+ }
+ }
+ return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
+ };
+ // Collect VTable from OpenMP map clause.
+ for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
+ for (const auto *E : C->varlist()) {
+ auto DeclPair = GetVTableDecl(E);
+ // Ensure VD is not null
+ if (DeclPair.second)
+ emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
+ }
+ }
+}
+
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
@@ -6249,6 +6363,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
}
}
+ registerVTable(D);
}
/// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10070,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;
+ // Register vtable from device for target data and target directives.
+ // Add this block here since scanForTargetRegionsFunctions ignores
+ // target data by checking if S is a executable directive (target).
+ if (auto *E = dyn_cast<OMPExecutableDirective>(S);
+ E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
+ // Don't need to check if it's device compile
+ // since scanForTargetRegionsFunctions currently only called
+ // in device compilation.
+ registerVTable(*E);
+ }
+
// Codegen OMP target directives that offload compute to the device.
bool RequiresDeviceCodegen =
isa<OMPExecutableDirective>(S) &&
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb04eceee236c..7f8a81d4090e2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -605,6 +605,9 @@ class CGOpenMPRuntime {
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
Address DependenciesArray);
+ /// Keep track of VTable Declarations so we don't register duplicate VTable.
+ llvm::SmallDenseMap<CXXRecordDecl *, const VarDecl *> VTableDeclMap;
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
@@ -1111,6 +1114,23 @@ class CGOpenMPRuntime {
virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
llvm::GlobalValue *GV);
+ /// Register VTable to OpenMP offload entry.
+ /// \param VTable VTable of the C++ class.
+ /// \param RD C++ class decl.
+ virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+ const VarDecl *VD);
+ /// Emit code for registering vtable by scanning through map clause
+ /// in OpenMP target region.
+ /// \param D OpenMP target directive.
+ virtual void registerVTable(const OMPExecutableDirective &D);
+
+ /// Emit and register VTable for the C++ class in OpenMP offload entry.
+ /// \param CXXRecord C++ class decl.
+ /// \param VD Variable decl which holds VTable.
+ virtual void emitAndRegisterVTable(CodeGenModule &CGM,
+ CXXRecordDecl *CXXRecord,
+ const VarDecl *VD);
+
/// Creates artificial threadprivate variable with name \p Name and type \p
/// VarType.
/// \param VarType Type of the artificial threadprivate variable.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d72cd8fbfd608..0b88f1dc5f0ea 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
+ // Emit vtable only from host for target data directive.
+ if (!CGM.getLangOpts().OpenMPIsTargetDevice)
+ CGM.getOpenMPRuntime().registerVTable(S);
+
CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
/*SeparateBeginEndCalls=*/true);
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index e14e883a55ac5..de4a67db313ea 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
/*DontDefer=*/true, /*IsThunk=*/true);
}
+llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
+ llvm::GlobalVariable *VTable =
+ CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
+ return VTable;
+}
+
static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
llvm::Function *ThunkFn, bool ForVTable,
GlobalDecl GD) {
diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h
index 5c45e355fb145..37458eee02e34 100644
--- a/clang/lib/CodeGen/CGVTables.h
+++ b/clang/lib/CodeGen/CGVTables.h
@@ -122,6 +122,10 @@ class CodeGenVTables {
llvm::GlobalVariable::LinkageTypes Linkage,
const CXXRecordDecl *RD);
+ /// GetAddrOfVTable - Get the address of the VTable for the given record
+ /// decl.
+ llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);
+
/// EmitThunks - Emit the associated thunks for the given global decl.
void EmitThunks(GlobalDecl GD);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 3971b296b3f80..49dcba4b7618b 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
// i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;
+ // Store indirect CallExprs that are within an omp target region
+ llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;
+
InstrProfStats &getPGOStats() { return PGOStats; }
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
diff --git a/clang/test/OpenMP/target_vtable_codegen_container.cpp b/clang/test/OpenMP/target_vtable_codegen_container.cpp
new file mode 100644
index 0000000000000..9fd4c6b736163
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_container.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV7Derived
+// CHECK-DAG: @_ZTV4Base
+template <typename T>
+class Container {
+private:
+T value;
+public:
+Container() : value() {}
+Container(T val) : value(val) {}
+
+T getValue() const { return value; }
+
+void setValue(T val) { value = val; }
+};
+
+class Base {
+public:
+ virtual void foo() {}
+};
+class Derived : public Base {};
+
+class Test {
+public:
+ Container<Derived> v;
+};
+
+int main() {
+ Test test;
+ Derived d;
+ test.v.setValue(d);
+
+// Make sure we emit VTable for type indirectly (template specialized type)
+#pragma omp target map(test)
+ {
+ test.v.getValue().foo();
+ }
+ return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_explicit.cpp b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp
new file mode 100644
index 0000000000000..001ed8fdd9cd7
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// Make sure both host and device compilation emit vtable for Dervied
+// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
+// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
+// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
+// CHECK-DAG: $_ZTV7Derived = comdat any
+class Base {
+public:
+
+ virtual ~Base() = default;
+
+ virtual void BaseA(int a) { }
+};
+
+// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] }
+class Derived : public Base {
+public:
+
+ ~Derived() override = default;
+
+ void BaseA(int a) override { x = a; }
+
+ virtual void DerivedB() { }
+private:
+ int x;
+};
+
+int main() {
+
+ Derived d;
+ Base& c = d;
+ int a = 50;
+ // Should emit vtable for Derived since d is added to map clause
+#pragma omp target data map (to: d, a)
+ {
+ #pragma omp target map(d)
+ {
+ c.BaseA(a);
+ }
+ }
+ return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
new file mode 100644
index 0000000000000..364c55cd07985
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+namespace {
+
+// Make sure both host and device compilation emit vtable for Dervied
+// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE
+// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev
+// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev
+// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi
+// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv
+class Base {
+public:
+ virtual ~Base() = default;
+ virtual void BaseA(int a) { }
+};
+
+class Derived : public Base {
+public:
+ ~Derived() override = default;
+ void BaseA(int a) override { x = a; }
+ virtual void DerivedB() { }
+private:
+ int x;
+};
+
+};
+
+int main() {
+
+ Derived d;
+ Base& c = d;
+ int a = 50;
+#pragma omp target data map (to: d, a)
+ {
+ #pragma omp target
+ {
+ c.BaseA(a);
+ }
+ }
+ return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp b/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
new file mode 100644
index 0000000000000..0535ba1dec741
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+
+// CHECK-DAG: $_ZN4Base5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
+// CHECK-DAG: $_ZN4BaseD1Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD0Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
+// CHECK-DAG: $_ZTV4Base = comdat any
+// CHECK-DAG: $_ZTV7Derived = comdat any
+class Base {
+public:
+
+ virtual ~Base() = default;
+
+ virtual void BaseA(int a) { }
+};
+
+class Derived : public Base {
+public:
+
+ ~Derived() override = default;
+
+ void BaseA(int a) override { x = a; }
+
+ virtual void DerivedB() { }
+private:
+ int x;
+};
+
+struct VirtualContainer {
+ Base baseObj;
+ Derived derivedObj;
+ Base *basePtr;
+};
+
+int main() {
+ VirtualContainer container;
+ container.basePtr = &container.derivedObj;
+ int a = 50;
+#pragma omp target map(container.baseObj, container.derivedObj, \
+ container.basePtr[ : 1])
+ {
+ container.baseObj.BaseA(a);
+ container.derivedObj.BaseA(a);
+ container.derivedObj.DerivedB();
+ container.basePtr->BaseA(a);
+ }
+ return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
new file mode 100644
index 0000000000000..3069a4994a479
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV6Base_1
+// CHECK-DAG: @_ZTV7Derived
+// CHECK-DAG: @_ZTV6Base_2
+#pragma omp begin declare target
+
+class Base_1 {
+public:
+ virtual void foo() { }
+ virtual void bar() { }
+};
+
+class Base_2 {
+public:
+ virtual void foo() { }
+ virtual void bar() { }
+};
+
+class Derived : public Base_1, public Base_2 {
+public:
+ virtual void foo() override { }
+ virtual void bar() override { }
+};
+
+#pragma omp end declare target
+
+int main() {
+ Base_1 base;
+ Derived derived;
+
+ // Make sure we emit vtable for parent class (Base_1 and Base_2)
+#pragma omp target data map(derived)
+ {
+ Base_1 *p1 = &derived;
+
+#pragma omp target
+ {
+ p1->foo();
+ p1->bar();
+ }
+ }
+ return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_nested.cpp b/clang/test/OpenMP/target_vtable_codegen_nested.cpp
new file mode 100644
index 0000000000000..1ece83d60ac58
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_nested.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV3Car
+// CHECK-DAG: @_ZTV6Engine
+// CHECK-DAG: @_ZTV6Wheels
+// CHECK-DAG: @_ZTV7Vehicle
+// CHECK-DAG: @_ZTV5Brand
+class Engine {
+public:
+ Engine(const char *type) : type(type) {}
+ virtual ~Engine() {}
+
+ virtual void start() const { }
+
+protected:
+ const char *type;
+};
+
+class Wheels {
+public:
+ Wheels(int count) : count(count) {}
+ virtual ~Wheels() {}
+
+ virtual void roll() const { }
+
+protected:
+ int count;
+};
+
+class Vehicle {
+public:
+ Vehicle(int speed) : speed(speed) {}
+ virtual ~Vehicle() {}
+
+ virtual void move() const { }
+
+protected:
+ int speed;
+};
+
+class Brand {
+public:
+ Brand(const char *brandName) : brandName(brandName) {}
+ virtual ~Brand() {}
+
+ void showBrand() const { }
+
+protected:
+ const char *brandName;
+};
+
+class Car : public Vehicle, public Brand {
+public:
+ Car(const char *brand, int speed, const char *engineType, int wheelCount)
+ : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {}
+
+ void move() const override { }
+
+ void drive() const {
+ showBrand();
+ engine.start();
+ wheels.roll();
+ move();
+ }
+
+private:
+ Engine engine;
+ Wheels wheels;
+};
+
+int main() {
+ Car myActualCar("Ford", 100, "Hybrid", 4);
+
+ // Make sure we emit VTable for dynamic class as field
+#pragma omp target map(myActualCar)
+ {
+ myActualCar.drive();
+ }
+ return 0;
+}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f43ef932e965a..cc0d4c89f9b9f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -390,6 +390,8 @@ class OffloadEntriesInfoManager {
OMPTargetGlobalVarEntryIndirect = 0x8,
/// Mark the entry as a register requires global.
OMPTargetGlobalRegisterRequires = 0x10,
+ /// Mark the entry as a declare target indirect vtable.
+ OMPTargetGlobalVarEntryIndirectVTable = 0x20,
};
/// Kind of device clause for declare target variables
@@ -2666,7 +2668,8 @@ class OpenMPIRBuilder {
enum EmitMetadataErrorKind {
EMIT_MD_TARGET_REGION_ERROR,
EMIT_MD_DECLARE_TARGET_ERROR,
- EMIT_MD_GLOBAL_VAR_LINK_ERROR
+ EMIT_MD_GLOBAL_VAR_LINK_ERROR,
+ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
};
/// Callback function type
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 220eee3cb8b08..236cfab3f031c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -10246,6 +10246,13 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
continue;
}
break;
+ case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect:
+ case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable:
+ if (!CE->getAddress()) {
+ ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second);
+ continue;
+ }
+ break;
default:
break;
}
@@ -10255,12 +10262,17 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
// entry. Indirect variables are handled separately on the device.
if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) &&
- Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+ (Flags !=
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
+ Flags != OffloadEntriesInfoManager::
+ OMPTargetGlobalVarEntryIndirectVTable))
continue;
// Indirect globals need to use a special name that doesn't match the name
// of the associated host global.
- if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+ if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
+ Flags ==
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
Flags, CE->getLinkage(), CE->getVarName());
else
@@ -10689,7 +10701,9 @@ void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
}
return;
}
- if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+ if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
+ Flags ==
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
Addr, VarSize, Flags, Linkage,
VarName.str());
>From cbe939d5cfc7fd248d2b15aa517df9f5738744ae Mon Sep 17 00:00:00 2001
From: jason-van-beusekom <jason.van-beusekom at hpe.com>
Date: Fri, 7 Nov 2025 10:40:53 -0600
Subject: [PATCH 2/2] [OpenMP][offload] Register Vtables runtime support for
indirect calls
- Modify PluginInterface to register Vtables to indirect call table
---
offload/include/omptarget.h | 2 +
offload/libomptarget/PluginManager.cpp | 7 +-
offload/libomptarget/device.cpp | 63 ++++++++---
.../test/api/omp_indirect_call_table_manual.c | 107 ++++++++++++++++++
4 files changed, 164 insertions(+), 15 deletions(-)
create mode 100644 offload/test/api/omp_indirect_call_table_manual.c
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..3317441f04eba 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
OMP_DECLARE_TARGET_INDIRECT = 0x08,
/// This is an entry corresponding to a requirement to be registered.
OMP_REGISTER_REQUIRES = 0x10,
+ /// Mark the entry global as being an indirect vtable.
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
};
enum TargetAllocTy : int32_t {
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index b57a2f815cba6..6fc330b92f0f5 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -434,7 +434,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
llvm::offloading::EntryTy DeviceEntry = Entry;
if (Entry.Size) {
- if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
+ if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+ Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
&DeviceEntry.Address) != OFFLOAD_SUCCESS)
REPORT("Failed to load symbol %s\n", Entry.SymbolName);
@@ -443,7 +444,9 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
// the device to point to the memory on the host.
if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
(PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
- if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
+ if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+ !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+ Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
Entry.Address,
Entry.Size) != OFFLOAD_SUCCESS)
REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 71423ae0c94d9..d5436bde47ba5 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -112,21 +112,58 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image,
llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
for (const auto &Entry : Entries) {
if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP ||
- Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
+ Entry.Size == 0 ||
+ (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+ !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE)))
continue;
- assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
- auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
-
- void *Ptr;
- if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
- return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
- "failed to load %s", Entry.SymbolName);
-
- HstPtr = Entry.Address;
- if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
- return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
- "failed to load %s", Entry.SymbolName);
+ size_t PtrSize = sizeof(void *);
+ if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) {
+ // This is a VTable entry, the current entry is the first index of the
+ // VTable and Entry.Size is the total size of the VTable. Unlike the
+ // indirect function case below, the Global is not of size Entry.Size and
+ // is instead of size PtrSize (sizeof(void*)).
+ void *Vtable;
+ void *res;
+ if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+
+ // HstPtr = Entry.Address;
+ if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+ if (Device.synchronize(AsyncInfo))
+ return error::createOffloadError(
+ error::ErrorCode::INVALID_BINARY,
+ "failed to synchronize after retrieving %s", Entry.SymbolName);
+ // Calculate and emplace entire Vtable from first Vtable byte
+ for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) {
+ auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+ HstPtr = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize);
+ DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) +
+ i * PtrSize);
+ }
+ } else {
+ // Indirect function case: Entry.Size should equal PtrSize since we're
+ // dealing with a single function pointer (not a VTable)
+ assert(Entry.Size == PtrSize && "Global not a function pointer?");
+ auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+ void *Ptr;
+ if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+
+ HstPtr = Entry.Address;
+ if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+ }
+ if (Device.synchronize(AsyncInfo))
+ return error::createOffloadError(
+ error::ErrorCode::INVALID_BINARY,
+ "failed to synchronize after retrieving %s", Entry.SymbolName);
}
// If we do not have any indirect globals we exit early.
diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c
new file mode 100644
index 0000000000000..e958d47d69dad
--- /dev/null
+++ b/offload/test/api/omp_indirect_call_table_manual.c
@@ -0,0 +1,107 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+typedef struct {
+ uint64_t Reserved;
+ uint16_t Version;
+ uint16_t Kind; // OpenMP==1
+ uint32_t Flags;
+ void *Address;
+ char *SymbolName;
+ uint64_t Size;
+ uint64_t Data;
+ void *AuxAddr;
+} __tgt_offload_entry;
+
+enum OpenMPOffloadingDeclareTargetFlags {
+ /// Mark the entry global as having a 'link' attribute.
+ OMP_DECLARE_TARGET_LINK = 0x01,
+ /// Mark the entry global as being an indirectly callable function.
+ OMP_DECLARE_TARGET_INDIRECT = 0x08,
+ /// This is an entry corresponding to a requirement to be registered.
+ OMP_REGISTER_REQUIRES = 0x10,
+ /// Mark the entry global as being an indirect vtable.
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
+};
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Provided by the runtime.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr);
+#pragma omp declare target to(__llvm_omp_indirect_call_lookup) \
+ device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// We assume unified addressing on the CPU target.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; }
+#pragma omp end declare variant
+
+#pragma omp begin declare target
+void foo(int *i) { *i += 1; }
+void bar(int *i) { *i += 10; }
+void baz(int *i) { *i += 100; }
+#pragma omp end declare target
+
+typedef void (*fptr_t)(int *i);
+
+// Dispatch Table - declare separately on host and device to avoid
+// registering with the library; this also allows us to use separate
+// names, which is convenient for debugging. This dispatchTable is
+// intended to mimic what Clang emits for C++ vtables.
+fptr_t dispatchTable[] = {foo, bar, baz};
+#pragma omp begin declare target device_type(nohost)
+fptr_t GPUdispatchTable[] = {foo, bar, baz};
+fptr_t *GPUdispatchTablePtr = GPUdispatchTable;
+#pragma omp end declare target
+
+// Define "manual" OpenMP offload entries, where we emit Clang
+// offloading entry structure definitions in the appropriate ELF
+// section. This allows us to emulate the offloading entries that Clang would
+// normally emit for us
+
+__attribute__((weak, section("llvm_offload_entries"), aligned(8)))
+const __tgt_offload_entry __offloading_entry[] = {{
+ 0ULL, // Reserved
+ 1, // Version
+ 1, // Kind
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags
+ &dispatchTable, // Address
+ "GPUdispatchTablePtr", // SymbolName
+ (size_t)(sizeof(dispatchTable)), // Size
+ 0ULL, // Data
+ NULL // AuxAddr
+}};
+
+// Mimic how Clang emits vtable pointers for C++ classes
+typedef struct {
+ fptr_t *dispatchPtr;
+} myClass;
+
+// ---------------------------------------------------------------------------
+int main() {
+ myClass obj_foo = {dispatchTable + 0};
+ myClass obj_bar = {dispatchTable + 1};
+ myClass obj_baz = {dispatchTable + 2};
+ int aaa = 0;
+
+#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz)
+ {
+ // Lookup
+ fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr);
+ fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr);
+ fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr);
+ foo_ptr[0](&aaa);
+ bar_ptr[0](&aaa);
+ baz_ptr[0](&aaa);
+ }
+
+ assert(aaa == 111);
+ // CHECK: PASS
+ printf("PASS\n");
+ return 0;
+}
More information about the cfe-commits
mailing list