[clang] 5cf2a37 - [HIP] Emit kernel symbol
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 1 13:32:09 PST 2021
Author: Yaxun (Sam) Liu
Date: 2021-03-01T16:31:40-05:00
New Revision: 5cf2a37f1255700d4da9d5f45e82bdfff09aee8c
URL: https://github.com/llvm/llvm-project/commit/5cf2a37f1255700d4da9d5f45e82bdfff09aee8c
DIFF: https://github.com/llvm/llvm-project/commit/5cf2a37f1255700d4da9d5f45e82bdfff09aee8c.diff
LOG: [HIP] Emit kernel symbol
Currently clang uses stub function to launch kernel. This is inconvenient
to interop with C++ programs since the stub function has different name
as kernel, which is required by ROCm debugger.
This patch emits a variable symbol which has the same name as the kernel
and uses it to register and launch the kernel. This allows C++ program to
launch a kernel by using the original kernel name.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D86376
Added:
clang/test/CodeGenCUDA/cxx-call-kernel.cpp
Modified:
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CGCUDARuntime.h
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenCUDA/Inputs/cuda.h
clang/test/CodeGenCUDA/kernel-dbg-info.cu
clang/test/CodeGenCUDA/kernel-stub-name.cu
clang/test/CodeGenCUDA/unnamed-types.cu
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 57e3b151bfd4..3a311ab395e4 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -42,12 +42,18 @@ class CGNVCUDARuntime : public CGCUDARuntime {
llvm::LLVMContext &Context;
/// Convenience reference to the current module
llvm::Module &TheModule;
- /// Keeps track of kernel launch stubs emitted in this module
+ /// Keeps track of kernel launch stubs and handles emitted in this module
struct KernelInfo {
- llvm::Function *Kernel;
+ llvm::Function *Kernel; // stub function to help launch kernel
const Decl *D;
};
llvm::SmallVector<KernelInfo, 16> EmittedKernels;
+ // Map a device stub function to a symbol for identifying kernel in host code.
+ // For CUDA, the symbol for identifying the kernel is the same as the device
+ // stub function. For HIP, they are
diff erent.
+ llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
+ // Map a kernel handle to the kernel stub.
+ llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
struct VarInfo {
llvm::GlobalVariable *Var;
const VarDecl *D;
@@ -154,6 +160,12 @@ class CGNVCUDARuntime : public CGCUDARuntime {
public:
CGNVCUDARuntime(CodeGenModule &CGM);
+ llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD) override;
+ llvm::Function *getKernelStub(llvm::GlobalValue *Handle) override {
+ auto Loc = KernelStubs.find(Handle);
+ assert(Loc != KernelStubs.end());
+ return Loc->second;
+ }
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void handleVarRegistration(const VarDecl *VD,
llvm::GlobalVariable &Var) override;
@@ -272,6 +284,10 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
+ if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
+ GV->setLinkage(CGF.CurFn->getLinkage());
+ GV->setInitializer(CGF.CurFn);
+ }
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
(CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
@@ -350,7 +366,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
ShmemSize.getPointer(), Stream.getPointer()});
// Emit the call to cudaLaunch
- llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
+ llvm::Value *Kernel =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -405,7 +422,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
- llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
@@ -499,7 +517,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(I.Kernel, VoidPtrTy),
+ Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
KernelName,
KernelName,
llvm::ConstantInt::get(IntTy, -1),
@@ -1070,3 +1088,28 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
}
return makeModuleCtorFunction();
}
+
+llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
+ GlobalDecl GD) {
+ auto Loc = KernelHandles.find(F);
+ if (Loc != KernelHandles.end())
+ return Loc->second;
+
+ if (!CGM.getLangOpts().HIP) {
+ KernelHandles[F] = F;
+ KernelStubs[F] = F;
+ return F;
+ }
+
+ auto *Var = new llvm::GlobalVariable(
+ TheModule, F->getType(), /*isConstant=*/true, F->getLinkage(),
+ /*Initializer=*/nullptr,
+ CGM.getMangledName(
+ GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel)));
+ Var->setAlignment(CGM.getPointerAlign().getAsAlign());
+ Var->setDSOLocal(F->isDSOLocal());
+ Var->setVisibility(F->getVisibility());
+ KernelHandles[F] = Var;
+ KernelStubs[Var] = F;
+ return Var;
+}
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index 2f4b7ab1dc6d..1c119dc77fd4 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
+#include "clang/AST/GlobalDecl.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/GlobalValue.h"
@@ -94,6 +95,13 @@ class CGCUDARuntime {
/// compilation is for host.
virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
+ /// Get kernel handle by stub function.
+ virtual llvm::GlobalValue *getKernelHandle(llvm::Function *Stub,
+ GlobalDecl GD) = 0;
+
+ /// Get kernel stub by kernel handle.
+ virtual llvm::Function *getKernelStub(llvm::GlobalValue *Handle) = 0;
+
/// Adjust linkage of shadow variables in host compilation.
virtual void
internalizeDeviceSideVar(const VarDecl *D,
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 05c553f22d60..d57dd7f49d50 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "CGCUDARuntime.h"
#include "CGCXXABI.h"
#include "CGCall.h"
#include "CGCleanup.h"
@@ -4871,8 +4872,12 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
return CGCallee::forBuiltin(builtinID, FD);
}
- llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
- return CGCallee::forDirect(calleePtr, GD);
+ llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+ if (CGF.CGM.getLangOpts().CUDA && !CGF.CGM.getLangOpts().CUDAIsDevice &&
+ FD->hasAttr<CUDAGlobalAttr>())
+ CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
+ cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
+ return CGCallee::forDirect(CalleePtr, GD);
}
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
@@ -5266,6 +5271,19 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
Callee.setFunctionPointer(CalleePtr);
}
+ // HIP function pointer contains kernel handle when it is used in triple
+ // chevron. The kernel stub needs to be loaded from kernel handle and used
+ // as callee.
+ if (CGM.getLangOpts().HIP && !CGM.getLangOpts().CUDAIsDevice &&
+ isa<CUDAKernelCallExpr>(E) &&
+ (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
+ llvm::Value *Handle = Callee.getFunctionPointer();
+ Handle->dump();
+ auto *Cast =
+ Builder.CreateBitCast(Handle, Handle->getType()->getPointerTo());
+ auto *Stub = Builder.CreateLoad(Address(Cast, CGM.getPointerAlign()));
+ Callee.setFunctionPointer(Stub);
+ }
llvm::CallBase *CallOrInvoke = nullptr;
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke,
E->getExprLoc());
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 0d499a564039..750439dd6844 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3571,9 +3571,19 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
}
StringRef MangledName = getMangledName(GD);
- return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
- /*IsThunk=*/false, llvm::AttributeList(),
- IsForDefinition);
+ auto *F = GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
+ /*IsThunk=*/false, llvm::AttributeList(),
+ IsForDefinition);
+ // Returns kernel handle for HIP kernel stub function.
+ if (LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
+ cast<FunctionDecl>(GD.getDecl())->hasAttr<CUDAGlobalAttr>()) {
+ auto *Handle = getCUDARuntime().getKernelHandle(
+ cast<llvm::Function>(F->stripPointerCasts()), GD);
+ if (IsForDefinition)
+ return F;
+ return llvm::ConstantExpr::getBitCast(Handle, Ty->getPointerTo());
+ }
+ return F;
}
static const FunctionDecl *
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index daa6328c9499..af395b3b97bb 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -2,6 +2,7 @@
#include <stddef.h>
+#if __HIP__ || __CUDA__
#define __constant__ __attribute__((constant))
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
@@ -11,13 +12,22 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __managed__
+#define __launch_bounds__(...)
+#endif
struct dim3 {
unsigned x, y, z;
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
-#ifdef __HIP__
+#if __HIP__ || HIP_PLATFORM
typedef struct hipStream *hipStream_t;
typedef enum hipError {} hipError_t;
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
diff --git a/clang/test/CodeGenCUDA/cxx-call-kernel.cpp b/clang/test/CodeGenCUDA/cxx-call-kernel.cpp
new file mode 100644
index 000000000000..ae58dcd348ce
--- /dev/null
+++ b/clang/test/CodeGenCUDA/cxx-call-kernel.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc
+// RUN: %clang_cc1 -mlink-bitcode-file %t.hip.bc -DHIP_PLATFORM -emit-llvm \
+// RUN: %s -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @_Z2g1i = constant void (i32)* @_Z17__device_stub__g1i, align 8
+#if __HIP__
+__global__ void g1(int x) {}
+#else
+extern void g1(int x);
+
+// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i
+void test() {
+ hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0);
+}
+
+// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i
+#endif
diff --git a/clang/test/CodeGenCUDA/kernel-dbg-info.cu b/clang/test/CodeGenCUDA/kernel-dbg-info.cu
index 4aa1f353cc5e..7e8522a371d7 100644
--- a/clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ b/clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -30,6 +30,9 @@ extern "C" __global__ void ckernel(int *a) {
*a = 1;
}
+// Kernel symbol for launching kernel.
+// CHECK: @[[SYM:ckernel]] = constant void (i32*)* @__device_stub__ckernel, align 8
+
// Device side kernel names
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
@@ -40,7 +43,7 @@ extern "C" __global__ void ckernel(int *a) {
// Make sure there is no !dbg between function attributes and '{'
// CHECK: define{{.*}} void @[[CSTUB:__device_stub__ckernel]]{{.*}} #{{[0-9]+}} {
// CHECK-NOT: call {{.*}}@hipLaunchByPtr{{.*}}!dbg
-// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[SYM]]
// CHECK-NOT: ret {{.*}}!dbg
// CHECK-LABEL: define {{.*}}@_Z8hostfuncPi{{.*}}!dbg
diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu
index b66a7ba190c0..0c504b612ea7 100644
--- a/clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -2,10 +2,17 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK
+// RUN: | FileCheck %s
#include "Inputs/cuda.h"
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8
+// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+
extern "C" __global__ void ckernel() {}
namespace ns {
@@ -17,6 +24,11 @@ __global__ void kernelfunc() {}
__global__ void kernel_decl();
+void (*kernel_ptr)();
+void *void_ptr;
+
+void launch(void *kern);
+
// Device side kernel names
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
@@ -26,16 +38,20 @@ __global__ void kernel_decl();
// Non-template kernel stub functions
// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
+
-// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
+// Check kernel stub is used for triple chevron
+
+// CHECK-LABEL: define{{.*}}@_Z4fun1v()
// CHECK: call void @[[CSTUB]]()
// CHECK: call void @[[NSSTUB]]()
// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
-void hostfunc(void) {
+
+void fun1(void) {
ckernel<<<1, 1>>>();
ns::nskernel<<<1, 1>>>();
kernelfunc<int><<<1, 1>>>();
@@ -45,11 +61,69 @@ void hostfunc(void) {
// Template kernel stub functions
// CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
+
+// Check declaration of stub function for external kernel.
// CHECK: declare{{.*}}@[[DSTUB]]
+// Check kernel handle is used for passing the kernel as a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun2v()
+// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]]
+void fun2() {
+ launch((void *)ckernel);
+ launch((void *)ns::nskernel);
+ launch((void *)kernelfunc<int>);
+ launch((void *)kernel_decl);
+}
+
+// Check kernel handle is used for assigning a kernel to a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun3v()
+// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
+// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
+void fun3() {
+ kernel_ptr = ckernel;
+ kernel_ptr = &ckernel;
+ void_ptr = (void *)ckernel;
+ void_ptr = (void *)&ckernel;
+}
+
+// Check kernel stub is loaded from kernel handle when function pointer is
+// used with triple chevron
+
+// CHECK-LABEL: define{{.*}}@_Z4fun4v()
+// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
+// CHECK: call i32 @_Z16hipConfigureCall4dim3S_mP9hipStream
+// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
+// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()**
+// CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8
+// CHECK: call void %[[STUB]]()
+void fun4() {
+ kernel_ptr = ckernel;
+ kernel_ptr<<<1,1>>>();
+}
+
+// Check kernel handle is passed to a function
+
+// CHECK-LABEL: define{{.*}}@_Z4fun5v()
+// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
+// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
+// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8*
+// CHECK: call void @_Z6launchPv(i8* %[[CAST]])
+void fun5() {
+ kernel_ptr = ckernel;
+ launch((void *)kernel_ptr);
+}
+
// CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
+// CHECK-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@[[HDKERN]]{{.*}}@[[DKERN]]
diff --git a/clang/test/CodeGenCUDA/unnamed-types.cu b/clang/test/CodeGenCUDA/unnamed-types.cu
index f598117d969d..b59d5f448dde 100644
--- a/clang/test/CodeGenCUDA/unnamed-types.cu
+++ b/clang/test/CodeGenCUDA/unnamed-types.cu
@@ -54,7 +54,7 @@ void f1(float *p) {
[] __device__ (float x) { return x + 5.f; });
}
// HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
// MSVC: __hipRegisterFunction{{.*}}@"??$k0 at V<lambda_1>@?0???R1?0??f1@@YAXPEAM at Z@QEBA at 0@Z@@@YAXPEAMV<lambda_1>@?0???R0?0??f1@@YAX0 at Z@QEBA at 0@Z@@Z{{.*}}@0
// MSVC: __hipRegisterFunction{{.*}}@"??$k1 at V<lambda_2>@?0??f1@@YAXPEAM at Z@V<lambda_3>@?0??2 at YAX0@Z at V<lambda_4>@?0??2 at YAX0@Z@@@YAXPEAMV<lambda_2>@?0??f1@@YAX0 at Z@V<lambda_3>@?0??1 at YAX0@Z at V<lambda_4>@?0??1 at YAX0@Z@@Z{{.*}}@1
More information about the cfe-commits
mailing list