r354004 - [CUDA][HIP] Use device side kernel and variable names when registering them

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 13 18:00:09 PST 2019


Author: yaxunl
Date: Wed Feb 13 18:00:09 2019
New Revision: 354004

URL: http://llvm.org/viewvc/llvm-project?rev=354004&view=rev
Log:
[CUDA][HIP] Use device side kernel and variable names when registering them

__hipRegisterFunction and __hipRegisterVar need to accept device side kernel and variable names
so that HIP runtime can associate kernel stub functions in host code with kernel symbols in fat binaries,
and associate shadow variables in host code with device variables in fat binaries.

Currently, clang assumes kernel functions and device variables have the same name as the kernel
stub functions and shadow variables. However, when host is compiled in windows with MSVC C++
ABI and device is compiled with Itanium C++ ABI (e.g. AMDGPU), kernels and device symbols in fat
binary are mangled differently than host.

This patch gets the device side kernel and variable name by mangling them in the mangle context
of aux target.

Differential Revision: https://reviews.llvm.org/D58163

Modified:
    cfe/trunk/include/clang/AST/ASTContext.h
    cfe/trunk/lib/AST/ASTContext.cpp
    cfe/trunk/lib/CodeGen/CGCUDANV.cpp
    cfe/trunk/lib/CodeGen/CGCUDARuntime.h
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/include/clang/AST/ASTContext.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTContext.h?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/ASTContext.h (original)
+++ cfe/trunk/include/clang/AST/ASTContext.h Wed Feb 13 18:00:09 2019
@@ -2237,7 +2237,8 @@ public:
 
   VTableContextBase *getVTableContext();
 
-  MangleContext *createMangleContext();
+  /// If \p T is null pointer, assume the target in ASTContext.
+  MangleContext *createMangleContext(const TargetInfo *T = nullptr);
 
   void DeepCollectObjCIvars(const ObjCInterfaceDecl *OI, bool leafClass,
                             SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const;

Modified: cfe/trunk/lib/AST/ASTContext.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/lib/AST/ASTContext.cpp (original)
+++ cfe/trunk/lib/AST/ASTContext.cpp Wed Feb 13 18:00:09 2019
@@ -9981,8 +9981,10 @@ VTableContextBase *ASTContext::getVTable
   return VTContext.get();
 }
 
-MangleContext *ASTContext::createMangleContext() {
-  switch (Target->getCXXABI().getKind()) {
+MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+  if (!T)
+    T = Target;
+  switch (T->getCXXABI().getKind()) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Wed Feb 13 18:00:09 2019
@@ -42,14 +42,25 @@ private:
   /// Convenience reference to the current module
   llvm::Module &TheModule;
   /// Keeps track of kernel launch stubs emitted in this module
-  llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
-  llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
+  struct KernelInfo {
+    llvm::Function *Kernel;
+    const Decl *D;
+  };
+  llvm::SmallVector<KernelInfo, 16> EmittedKernels;
+  struct VarInfo {
+    llvm::GlobalVariable *Var;
+    const VarDecl *D;
+    unsigned Flag;
+  };
+  llvm::SmallVector<VarInfo, 16> DeviceVars;
   /// Keeps track of variable containing handle of GPU binary. Populated by
   /// ModuleCtorFunction() and used to create corresponding cleanup calls in
   /// ModuleDtorFunction()
   llvm::GlobalVariable *GpuBinaryHandle = nullptr;
   /// Whether we generate relocatable device code.
   bool RelocatableDeviceCode;
+  /// Mangle context for device.
+  std::unique_ptr<MangleContext> DeviceMC;
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -106,13 +117,15 @@ private:
 
   void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
   void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
+  std::string getDeviceSideName(const Decl *ND);
 
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
-  void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
-    DeviceVars.push_back(std::make_pair(&Var, Flags));
+  void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
+                         unsigned Flags) override {
+    DeviceVars.push_back({&Var, VD, Flags});
   }
 
   /// Creates module constructor function
@@ -138,7 +151,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToN
 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
     : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
       TheModule(CGM.getModule()),
-      RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) {
+      RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
+      DeviceMC(CGM.getContext().createMangleContext(
+          CGM.getContext().getAuxTargetInfo())) {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
 
@@ -187,9 +202,26 @@ llvm::FunctionType *CGNVCUDARuntime::get
   return llvm::FunctionType::get(VoidTy, Params, false);
 }
 
+std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) {
+  auto *ND = cast<const NamedDecl>(D);
+  std::string DeviceSideName;
+  if (DeviceMC->shouldMangleDeclName(ND)) {
+    SmallString<256> Buffer;
+    llvm::raw_svector_ostream Out(Buffer);
+    DeviceMC->mangleName(ND, Out);
+    DeviceSideName = Out.str();
+  } else
+    DeviceSideName = ND->getIdentifier()->getName();
+  return DeviceSideName;
+}
+
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
                                      FunctionArgList &Args) {
-  EmittedKernels.push_back(CGF.CurFn);
+  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
+         CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
+             CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
+
+  EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
                          CudaFeature::CUDA_USES_NEW_LAUNCH))
     emitDeviceStubBodyNew(CGF, Args);
@@ -367,13 +399,19 @@ llvm::Function *CGNVCUDARuntime::makeReg
   // __cuda_register_globals() and generate __cudaRegisterFunction() call for
   // each emitted kernel.
   llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
-  for (llvm::Function *Kernel : EmittedKernels) {
-    llvm::Constant *KernelName = makeConstantString(Kernel->getName());
+  for (auto &&I : EmittedKernels) {
+    llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D));
     llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
     llvm::Value *Args[] = {
-        &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
-        KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
-        NullPtr, NullPtr, NullPtr,
+        &GpuBinaryHandlePtr,
+        Builder.CreateBitCast(I.Kernel, VoidPtrTy),
+        KernelName,
+        KernelName,
+        llvm::ConstantInt::get(IntTy, -1),
+        NullPtr,
+        NullPtr,
+        NullPtr,
+        NullPtr,
         llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
     Builder.CreateCall(RegisterFunc, Args);
   }
@@ -386,10 +424,10 @@ llvm::Function *CGNVCUDARuntime::makeReg
   llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(IntTy, RegisterVarParams, false),
       addUnderscoredPrefixToName("RegisterVar"));
-  for (auto &Pair : DeviceVars) {
-    llvm::GlobalVariable *Var = Pair.first;
-    unsigned Flags = Pair.second;
-    llvm::Constant *VarName = makeConstantString(Var->getName());
+  for (auto &&Info : DeviceVars) {
+    llvm::GlobalVariable *Var = Info.Var;
+    unsigned Flags = Info.Flag;
+    llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
     uint64_t VarSize =
         CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
     llvm::Value *Args[] = {

Modified: cfe/trunk/lib/CodeGen/CGCUDARuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDARuntime.h?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDARuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGCUDARuntime.h Wed Feb 13 18:00:09 2019
@@ -23,6 +23,7 @@ class GlobalVariable;
 namespace clang {
 
 class CUDAKernelCallExpr;
+class VarDecl;
 
 namespace CodeGen {
 
@@ -52,7 +53,8 @@ public:
 
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
-  virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0;
+  virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
+                                 unsigned Flags) = 0;
 
   /// Constructs and returns a module initialization function or nullptr if it's
   /// not needed. Must be called after all kernels have been emitted.

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Wed Feb 13 18:00:09 2019
@@ -3635,7 +3635,7 @@ void CodeGenModule::EmitGlobalVarDefinit
         // Extern global variables will be registered in the TU where they are
         // defined.
         if (!D->hasExternalStorage())
-          getCUDARuntime().registerDeviceVar(*GV, Flags);
+          getCUDARuntime().registerDeviceVar(D, *GV, Flags);
       } else if (D->hasAttr<CUDASharedAttr>())
         // __shared__ variables are odd. Shadows do get created, but
         // they are not registered with the CUDA runtime, so they

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=354004&r1=354003&r2=354004&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Wed Feb 13 18:00:09 2019
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s \
-// RUN:       --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-OLD
+// RUN:       --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-OLD
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=8.0  -fcuda-include-gpubinary %t \
 // RUN:     -o - -DNOGLOBALS \
@@ -12,7 +12,7 @@
 // RUN:     -target-sdk-version=8.0 -fgpu-rdc -fcuda-include-gpubinary %t \
 // RUN:     -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s \
-// RUN:       --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA-OLD
+// RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-OLD
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=8.0 -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
@@ -20,7 +20,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s       \
 // RUN:     -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
-// RUN:       --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-NEW
+// RUN:       --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-NEW
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s \
@@ -28,56 +28,70 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
-// RUN:       --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA_NEW
+// RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -fcuda-include-gpubinary %t -o - -x hip\
-// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC,HIP,HIPEF
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -fcuda-include-gpubinary %t -o -  -DNOGLOBALS -x hip \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
-// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC,HIP,HIPEF
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
-// RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,LNX,NORDC,HIP,HIPNEF
+
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \
+// RUN:     -fcuda-include-gpubinary %t -o - -x hip\
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN
 
 #include "Inputs/cuda.h"
 
 #ifndef NOGLOBALS
-// ALL-DAG: @device_var = internal global i32
+// LNX-DAG: @device_var = internal global i32
+// WIN-DAG: @"?device_var@@3HA" = internal global i32
 __device__ int device_var;
 
-// ALL-DAG: @constant_var = internal global i32
+// LNX-DAG: @constant_var = internal global i32
+// WIN-DAG: @"?constant_var@@3HA" = internal global i32
 __constant__ int constant_var;
 
-// ALL-DAG: @shared_var = internal global i32
+// LNX-DAG: @shared_var = internal global i32
+// WIN-DAG: @"?shared_var@@3HA" = internal global i32
 __shared__ int shared_var;
 
 // Make sure host globals don't get internalized...
-// ALL-DAG: @host_var = global i32
+// LNX-DAG: @host_var = global i32
+// WIN-DAG: @"?host_var@@3HA" = dso_local global i32
 int host_var;
 // ... and that extern vars remain external.
-// ALL-DAG: @ext_host_var = external global i32
+// LNX-DAG: @ext_host_var = external global i32
+// WIN-DAG: @"?ext_host_var@@3HA" = external dso_local global i32
 extern int ext_host_var;
 
 // external device-side variables -> extern references to their shadows.
-// ALL-DAG: @ext_device_var = external global i32
+// LNX-DAG: @ext_device_var = external global i32
+// WIN-DAG: @"?ext_device_var@@3HA" = external dso_local global i32
 extern __device__ int ext_device_var;
-// ALL-DAG: @ext_device_var = external global i32
+// LNX-DAG: @ext_device_var = external global i32
+// WIN-DAG: @"?ext_constant_var@@3HA" = external dso_local global i32
 extern __constant__ int ext_constant_var;
 
 // external device-side variables with definitions should generate
 // definitions for the shadows.
-// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+// LNX-DAG: @ext_device_var_def = internal global i32 undef,
+// WIN-DAG: @"?ext_device_var_def@@3HA" = internal global i32 undef
 extern __device__ int ext_device_var_def;
 __device__ int ext_device_var_def = 1;
-// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+// LNX-DAG: @ext_device_var_def = internal global i32 undef,
+// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
 __constant__ int ext_constant_var_def = 2;
 
+
 void use_pointers() {
   int *p;
   p = &device_var;
@@ -90,8 +104,15 @@ void use_pointers() {
 }
 
 // Make sure that all parts of GPU code init/cleanup are there:
-// * constant unnamed string with the kernel name
-// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
+// * constant unnamed string with the device-side kernel name to be passed to
+//   __hipRegisterFunction/__cudaRegisterFunction.
+// ALL: @0 = private unnamed_addr constant [18 x i8] c"_Z10kernelfunciii\00"
+// * constant unnamed string with the device-side kernel name to be passed to
+//   __hipRegisterVar/__cudaRegisterVar.
+// ALL: @1 = private unnamed_addr constant [11 x i8] c"device_var\00"
+// ALL: @2 = private unnamed_addr constant [13 x i8] c"constant_var\00"
+// ALL: @3 = private unnamed_addr constant [19 x i8] c"ext_device_var_def\00"
+// ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00"
 // * constant unnamed string with GPU binary
 // CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
 // HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
@@ -100,13 +121,13 @@ void use_pointers() {
 // CUDARDC-SAME: section "__nv_relfatbin", align 8
 // * constant struct that wraps GPU binary
 // ALL: @__[[PREFIX:cuda|hip]]_fatbin_wrapper = internal constant
-// ALL-SAME: { i32, i32, i8*, i8* }
+// LNX-SAME: { i32, i32, i8*, i8* }
 // CUDA-SAME: { i32 1180844977, i32 1,
 // HIP-SAME: { i32 1212764230, i32 1,
 // CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
 // HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
 // HIPNEF-SAME:  i8* @[[FATBIN]],
-// ALL-SAME: i8* null }
+// LNX-SAME: i8* null }
 // CUDA-SAME: section ".nvFatBinSegment"
 // HIP-SAME: section ".hipFatBinSegment"
 // * variable to save GPU binary handle after initialization
@@ -116,7 +137,7 @@ void use_pointers() {
 // RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
 // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
 // * Make sure our constructor was added to global ctor list.
-// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
+// LNX: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
 // * Alias to global symbol containing the NVModuleID.
 // RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
 // RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
@@ -124,7 +145,7 @@ void use_pointers() {
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
-// ALL: define{{.*}}kernelfunc
+// LNX: define{{.*}}kernelfunc
 
 // New launch sequence stores arguments into local buffer and passes array of
 // pointers to them directly to cudaLaunchKernel
@@ -149,25 +170,25 @@ void use_pointers() {
 __global__ void kernelfunc(int i, int j, int k) {}
 
 // Test that we've built correct kernel launch sequence.
-// ALL: define{{.*}}hostfunc
+// LNX: define{{.*}}hostfunc
 // CUDA-OLD: call{{.*}}[[PREFIX]]ConfigureCall
 // CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
 // HIP: call{{.*}}[[PREFIX]]ConfigureCall
-// ALL: call{{.*}}kernelfunc
+// LNX: call{{.*}}kernelfunc
 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 #endif
 
 // Test that we've built a function to register kernels and global vars.
 // ALL: define internal void @__[[PREFIX]]_register_globals
-// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0
+// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc{{[^,]*}}, {{[^@]*}}@0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, i32 4, i32 0, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, i32 4, i32 1, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, i32 4, i32 0, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, i32 4, i32 1, i32 0
 // ALL: ret void
 
 // Test that we've built a constructor.
-// ALL: define internal void @__[[PREFIX]]_module_ctor
+// LNX: define internal void @__[[PREFIX]]_module_ctor
 
 // In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
 // HIP only register fat binary once.




More information about the cfe-commits mailing list