[clang] [llvm] [OpenMP][clang] Indirect and Virtual function call mapping from host to device (PR #159857)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 19 15:02:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (Jason-VanBeusekom)
<details>
<summary>Changes</summary>
This adds a feature to insert CPU-to-GPU function pointer translation at GPU
call-sites
How it works:
Search Clang AST within "omp target" region for CallExprs.
Store them in the set CrayOMPTargetCalls (kept by CodeGenModule).
When generating code for function calls that use function pointers,
check whether the associated CallExpr is in the set CrayOMPTargetCalls.
If YES, insert a call to devicertl function __llvm_omp_indirect_call_lookup.
This is the second out of Two PR's to implement this, commit 5247c1f2ad7e1ae05cc92daca7979d7c9d838cb2 is not a part of the Pull request and is handled in: https://github.com/llvm/llvm-project/pull/159856
---
Patch is 89.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159857.diff
21 Files Affected:
- (modified) clang/lib/CodeGen/CGExpr.cpp (+20)
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+158)
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+13)
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+4)
- (modified) clang/lib/CodeGen/CGVTables.cpp (+6)
- (modified) clang/lib/CodeGen/CGVTables.h (+4)
- (modified) clang/lib/CodeGen/CodeGenModule.h (+3)
- (modified) clang/lib/CodeGen/ItaniumCXXABI.cpp (+18)
- (added) clang/test/OpenMP/target_vtable_codegen.cpp (+280)
- (added) clang/test/OpenMP/target_vtable_omp_indirect_call_lookup.cpp (+51)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+4-1)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+16-3)
- (modified) offload/include/omptarget.h (+2)
- (modified) offload/libomptarget/PluginManager.cpp (+5-2)
- (modified) offload/libomptarget/device.cpp (+32-5)
- (added) offload/test/api/omp_indirect_func_basic.c (+97)
- (added) offload/test/api/omp_indirect_func_struct.c (+213)
- (added) offload/test/api/omp_virtual_func.cpp (+161)
- (added) offload/test/api/omp_virtual_func_multiple_inheritance_01.cpp (+416)
- (added) offload/test/api/omp_virtual_func_multiple_inheritance_02.cpp (+428)
- (added) offload/test/api/omp_virtual_func_reference.cpp (+80)
``````````diff
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947882544..cc4c21a719f4c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6583,6 +6583,26 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
Address(Handle, Handle->getType(), CGM.getPointerAlign()));
Callee.setFunctionPointer(Stub);
}
+
+ // Check whether the associated CallExpr is in the set OMPTargetCalls.
+ // If YES, insert a call to devicertl function __llvm_omp_indirect_call_lookup
+ //
+ // This is used for the indriect function Case, virtual function case is
+ // handled in ItaniumCXXABI.cpp
+ if (getLangOpts().OpenMPIsTargetDevice && CGM.OMPTargetCalls.contains(E)) {
+ auto *PtrTy = CGM.VoidPtrTy;
+ llvm::Type *RtlFnArgs[] = {PtrTy};
+ llvm::FunctionCallee DeviceRtlFn = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(PtrTy, RtlFnArgs, false),
+ "__llvm_omp_indirect_call_lookup");
+ llvm::Value *Func = Callee.getFunctionPointer();
+ llvm::Type *BackupTy = Func->getType();
+ Func = Builder.CreatePointerBitCastOrAddrSpaceCast(Func, PtrTy);
+ Func = EmitRuntimeCall(DeviceRtlFn, {Func});
+ Func = Builder.CreatePointerBitCastOrAddrSpaceCast(Func, BackupTy);
+ Callee.setFunctionPointer(Func);
+ }
+
llvm::CallBase *LocalCallOrInvoke = nullptr;
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
E == MustTailCall, E->getExprLoc());
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a503aaf613e30..ac1d467affc00 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -24,6 +24,7 @@
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceManager.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
@@ -1771,12 +1772,126 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
}
+ // Register the indirect Vtable:
+ // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
+ // size field refers to the size of memory pointed to, not the size of
+ // the pointer symbol itself (which is implicitly the size of a pointer).
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
llvm::GlobalValue::WeakODRLinkage);
}
+void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+ const VarDecl *VD) {
+ // TODO: add logic to avoid duplicate vtable registrations per
+ // translation unit; though for external linkage, this should no
+ // longer be an issue - or at least we can avoid the issue by
+ // checking for an existing offloading entry. But, perhaps the
+ // better approach is to defer emission of the vtables and offload
+ // entries until later (by tracking a list of items that need to be
+ // emitted).
+
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ // Generate a new externally visible global to point to the
+ // internally visible vtable. Doing this allows us to keep the
+ // visibility and linkage of the associated vtable unchanged while
+ // allowing the runtime to access its value. The externally
+ // visible global var needs to be emitted with a unique mangled
+ // name that won't conflict with similarly named (internal)
+ // vtables in other translation units.
+
+ // Register vtable with source location of dynamic object in map
+ // clause.
+ llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
+ CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
+ VTable->getName());
+
+ llvm::GlobalVariable *Addr = VTable;
+ size_t PointerSize = CGM.getDataLayout().getPointerSize();
+ SmallString<128> AddrName;
+ OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
+ AddrName.append("addr");
+
+ if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+ Addr = new llvm::GlobalVariable(
+ CGM.getModule(), VTable->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
+ AddrName,
+ /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
+ Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+ }
+ OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
+ AddrName, VTable,
+ CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
+ llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
+ llvm::GlobalValue::WeakODRLinkage);
+}
+
+// Register VTable by scanning through the map clause of OpenMP target region.
+void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
+ // Get CXXRecordDecl and VarDecl from Expr.
+ auto getVTableDecl = [](const Expr *E) {
+ QualType VDTy = E->getType();
+ CXXRecordDecl *CXXRecord = nullptr;
+ if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
+ VDTy = RefType->getPointeeType();
+ if (VDTy->isPointerType())
+ CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
+ else
+ CXXRecord = VDTy->getAsCXXRecordDecl();
+
+ const VarDecl *VD = nullptr;
+ if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+ VD = cast<VarDecl>(DRE->getDecl());
+ return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
+ };
+
+ // Emit VTable and register the VTable to OpenMP offload entry recursively.
+ std::function<void(CodeGenModule &, CXXRecordDecl *, const VarDecl *)>
+ emitAndRegisterVTable = [&emitAndRegisterVTable](CodeGenModule &CGM,
+ CXXRecordDecl *CXXRecord,
+ const VarDecl *VD) {
+ // Register C++ VTable to OpenMP Offload Entry if it's a new
+ // CXXRecordDecl.
+ if (CXXRecord && CXXRecord->isDynamicClass() &&
+ CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) ==
+ CGM.getOpenMPRuntime().VTableDeclMap.end()) {
+ CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
+ CGM.EmitVTable(CXXRecord);
+ auto VTables = CGM.getVTables();
+ auto *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
+ if (VTablesAddr) {
+ CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
+ }
+ // Emit VTable for all the fields containing dynamic CXXRecord
+ for (const FieldDecl *Field : CXXRecord->fields()) {
+ if (CXXRecordDecl *RecordDecl =
+ Field->getType()->getAsCXXRecordDecl()) {
+ emitAndRegisterVTable(CGM, RecordDecl, VD);
+ }
+ }
+ // Emit VTable for all dynamic parent class
+ for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
+ if (CXXRecordDecl *BaseDecl =
+ Base.getType()->getAsCXXRecordDecl()) {
+ emitAndRegisterVTable(CGM, BaseDecl, VD);
+ }
+ }
+ }
+ };
+
+ // Collect VTable from OpenMP map clause.
+ for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
+ for (const auto *E : C->varlist()) {
+ auto DeclPair = getVTableDecl(E);
+ emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
+ }
+ }
+}
+
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
@@ -6221,6 +6336,25 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ class OMPTargetCallCollector
+ : public RecursiveASTVisitor<OMPTargetCallCollector> {
+ public:
+ OMPTargetCallCollector(CodeGenFunction &CGF,
+ llvm::SmallPtrSetImpl<const CallExpr *> &TargetCalls)
+ : CGF(CGF), TargetCalls(TargetCalls) {}
+
+ bool VisitCallExpr(CallExpr *CE) {
+ if (!CE->getDirectCallee()) {
+ TargetCalls.insert(CE);
+ }
+ return true;
+ }
+
+ private:
+ CodeGenFunction &CGF;
+ llvm::SmallPtrSetImpl<const CallExpr *> &TargetCalls;
+ };
+
llvm::TargetRegionEntryInfo EntryInfo =
getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
@@ -6229,6 +6363,16 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
[&CGF, &D, &CodeGen](StringRef EntryFnName) {
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
+ // Search Clang AST within "omp target" region for CallExprs.
+ // Store them in the set OMPTargetCalls (kept by CodeGenModule).
+ // This is used for the translation of indirect function calls.
+ const auto &LangOpts = CGF.getLangOpts();
+ if (LangOpts.OpenMPIsTargetDevice) {
+ // Search AST for target "CallExpr"s of "OMPTargetAutoLookup".
+ OMPTargetCallCollector Visitor(CGF, CGF.CGM.OMPTargetCalls);
+ Visitor.TraverseStmt(const_cast<Stmt*>(CS.getCapturedStmt()));
+ }
+
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
@@ -6249,6 +6393,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
}
}
+ registerVTable(D);
}
/// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10100,19 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;
+ // Register vtable from device for target data and target directives.
+ // Add this block here since scanForTargetRegionsFunctions ignores
+ // target data by checking if S is a executable directive (target).
+ if (isa<OMPExecutableDirective>(S) &&
+ isOpenMPTargetDataManagementDirective(
+ cast<OMPExecutableDirective>(S)->getDirectiveKind())) {
+ auto &E = *cast<OMPExecutableDirective>(S);
+ // Don't need to check if it's device compile
+ // since scanForTargetRegionsFunctions currently only called
+ // in device compilation.
+ registerVTable(E);
+ }
+
// Codegen OMP target directives that offload compute to the device.
bool RequiresDeviceCodegen =
isa<OMPExecutableDirective>(S) &&
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb04eceee236c..0f7937ae95c06 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -605,6 +605,9 @@ class CGOpenMPRuntime {
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
Address DependenciesArray);
+ /// Keep track of VTable Declarations so we don't register duplicate VTable.
+ llvm::DenseMap<CXXRecordDecl*, const VarDecl*> VTableDeclMap;
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
@@ -1111,6 +1114,16 @@ class CGOpenMPRuntime {
virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
llvm::GlobalValue *GV);
+ /// Register VTable to OpenMP offload entry.
+ /// \param VTable VTable of the C++ class.
+ /// \param RD C++ class decl.
+ virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+ const VarDecl *VD);
+ /// Emit code for registering vtable by scanning through map clause
+ /// in OpenMP target region.
+ /// \param D OpenMP target directive.
+ virtual void registerVTable(const OMPExecutableDirective &D);
+
/// Creates artificial threadprivate variable with name \p Name and type \p
/// VarType.
/// \param VarType Type of the artificial threadprivate variable.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d72cd8fbfd608..582dd0f3ade65 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
+ // Emit vtable only from host for target data directive.
+ if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
+ CGM.getOpenMPRuntime().registerVTable(S);
+ }
CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
/*SeparateBeginEndCalls=*/true);
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index e14e883a55ac5..de4a67db313ea 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
/*DontDefer=*/true, /*IsThunk=*/true);
}
+llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
+ llvm::GlobalVariable *VTable =
+ CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
+ return VTable;
+}
+
static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
llvm::Function *ThunkFn, bool ForVTable,
GlobalDecl GD) {
diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h
index 5c45e355fb145..37458eee02e34 100644
--- a/clang/lib/CodeGen/CGVTables.h
+++ b/clang/lib/CodeGen/CGVTables.h
@@ -122,6 +122,10 @@ class CodeGenVTables {
llvm::GlobalVariable::LinkageTypes Linkage,
const CXXRecordDecl *RD);
+ /// GetAddrOfVTable - Get the address of the VTable for the given record
+ /// decl.
+ llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);
+
/// EmitThunks - Emit the associated thunks for the given global decl.
void EmitThunks(GlobalDecl GD);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 3971b296b3f80..4ace1abcb5246 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
// i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;
+ // Store indirect CallExprs that are within an omp target region
+ llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;
+
InstrProfStats &getPGOStats() { return PGOStats; }
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 7dc2eaf1e9f75..1dbfe23cef127 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2261,6 +2261,24 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
llvm::Type *PtrTy = CGM.GlobalsInt8PtrTy;
auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
llvm::Value *VTable = CGF.GetVTablePtr(This, PtrTy, MethodDecl->getParent());
+ /*
+ * For the translate of virtual functions we need to map the (potential) host vtable
+ * to the device vtable. This is done by calling the runtime function
+ * __llvm_omp_indirect_call_lookup.
+ */
+ if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+ auto *NewPtrTy = CGM.VoidPtrTy;
+ llvm::Type *RtlFnArgs[] = {NewPtrTy};
+ llvm::FunctionCallee DeviceRtlFn = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(NewPtrTy, RtlFnArgs, false),
+ "__llvm_omp_indirect_call_lookup");
+ auto *BackupTy = VTable->getType();
+ // Need to convert to generic address space
+ VTable = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(VTable, NewPtrTy);
+ VTable = CGF.EmitRuntimeCall(DeviceRtlFn, {VTable});
+ // convert to original address space
+ VTable = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(VTable, BackupTy);
+ }
uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
llvm::Value *VFunc, *VTableSlotPtr = nullptr;
diff --git a/clang/test/OpenMP/target_vtable_codegen.cpp b/clang/test/OpenMP/target_vtable_codegen.cpp
new file mode 100644
index 0000000000000..276cef4eb8801
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen.cpp
@@ -0,0 +1,280 @@
+///==========================================================================///
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK1
+//
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK2
+//
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK3
+//
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK4
+//
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s --check-prefix=CK5
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+#ifdef CK1
+
+// Make sure both host and device compilation emit vtable for Dervied
+// CK1-DAG: $_ZN7DerivedD1Ev = comdat any
+// CK1-DAG: $_ZN7DerivedD0Ev = comdat any
+// CK1-DAG: $_ZN7Derived5BaseAEi = comdat any
+// CK1-DAG: $_ZN7Derived8DerivedBEv = comdat any
+// CK1-DAG: $_ZN7DerivedD2Ev = comdat any
+// CK1-DAG: $_ZN4BaseD2Ev = comdat any
+// CK1-DAG: $_ZTV7Derived = comdat any
+class Base {
+public:
+ virtual ~Base() = default;
+ virtual void BaseA(int a) { }
+};
+
+// CK1: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] }
+class Derived : public Base {
+public:
+ ~Derived(...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/159857
More information about the cfe-commits
mailing list