[clang] [llvm] [Clang][CUDA] Add support for __managed__ variables in non-RDC and default RDC mode (PR #149716)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 02:48:18 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
Author: Acthinks Yang (Acthinks)
<details>
<summary>Changes</summary>
This change adds support for __managed__ variables in:
1. Non-RDC (Relocatable Device Code) compilation mode
2. Default RDC mode (which uses the new offload driver)
Support for __managed__ variables in legacy RDC mode (without the new offload driver)
is not yet implemented and remains a TODO item.
Closes #<!-- -->147373
---
Patch is 49.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149716.diff
10 Files Affected:
- (modified) clang/include/clang/Basic/Attr.td (+1-1)
- (modified) clang/lib/CodeGen/CGCUDANV.cpp (+45-11)
- (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+11-4)
- (modified) clang/test/CodeGenCUDA/Inputs/cuda.h (-2)
- (modified) clang/test/CodeGenCUDA/anon-ns.cu (+4-8)
- (modified) clang/test/CodeGenCUDA/device-var-linkage.cu (+40-34)
- (modified) clang/test/CodeGenCUDA/managed-var.cu (+81-36)
- (modified) clang/test/CodeGenCUDA/offloading-entries.cu (+52-50)
- (modified) clang/test/Driver/linker-wrapper-image.c (+13-2)
- (modified) llvm/lib/Frontend/Offloading/OffloadWrapper.cpp (+45-7)
``````````diff
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 224cb6a32af28..9ecdf2322ab64 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1562,7 +1562,7 @@ def CUDAGridConstant : InheritableAttr {
def HIPManaged : InheritableAttr {
let Spellings = [GNU<"managed">, Declspec<"__managed__">];
let Subjects = SubjectList<[Var]>;
- let LangOpts = [HIP];
+ let LangOpts = [HIP, CUDA];
let Documentation = [HIPManagedAttrDocs];
}
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index dd26be74e561b..2a71b90a808d1 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -42,7 +42,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
StringRef Prefix;
private:
- llvm::IntegerType *IntTy, *SizeTy;
+ llvm::IntegerType *IntTy, *SizeTy, *CharTy;
llvm::Type *VoidTy;
llvm::PointerType *PtrTy;
@@ -231,6 +231,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;
PtrTy = CGM.UnqualPtrTy;
+ CharTy = CGM.CharTy;
if (CGM.getLangOpts().OffloadViaLLVM)
Prefix = "llvm";
@@ -547,10 +548,11 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
}
// Replace the original variable Var with the address loaded from variable
-// ManagedVar populated by HIP runtime.
+// ManagedVar populated by HIP/CUDA runtime.
static void replaceManagedVar(llvm::GlobalVariable *Var,
llvm::GlobalVariable *ManagedVar) {
SmallVector<SmallVector<llvm::User *, 8>, 8> WorkList;
+
for (auto &&VarUse : Var->uses()) {
WorkList.push_back({VarUse.getUser()});
}
@@ -661,8 +663,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
addUnderscoredPrefixToName("RegisterVar"));
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
// size_t, unsigned)
- llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
- PtrTy, VarSizeTy, IntTy};
+ // void __cudaRegisterManagedVar(void **, void **, char *, const char *,
+ // int, size_t, int, int)
+ SmallVector<llvm::Type *, 8> RegisterManagedVarParams;
+ if (CGM.getLangOpts().HIP)
+ RegisterManagedVarParams = {PtrTy, PtrTy, PtrTy, PtrTy, VarSizeTy, IntTy};
+ else
+ RegisterManagedVarParams = {PtrTy, PtrTy, PtrTy, PtrTy,
+ IntTy, VarSizeTy, IntTy, IntTy};
+
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
addUnderscoredPrefixToName("RegisterManagedVar"));
@@ -693,13 +702,23 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
"HIP managed variables not transformed");
auto *ManagedVar = CGM.getModule().getNamedGlobal(
Var->getName().drop_back(StringRef(".managed").size()));
- llvm::Value *Args[] = {
- &GpuBinaryHandlePtr,
- ManagedVar,
- Var,
- VarName,
- llvm::ConstantInt::get(VarSizeTy, VarSize),
- llvm::ConstantInt::get(IntTy, Var->getAlignment())};
+ SmallVector<llvm::Value *, 8> Args;
+ if (CGM.getLangOpts().HIP)
+ Args = {&GpuBinaryHandlePtr,
+ ManagedVar,
+ Var,
+ VarName,
+ llvm::ConstantInt::get(VarSizeTy, VarSize),
+ llvm::ConstantInt::get(IntTy, Var->getAlignment())};
+ else
+ Args = {&GpuBinaryHandlePtr,
+ ManagedVar,
+ VarName,
+ VarName,
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
+ llvm::ConstantInt::get(VarSizeTy, VarSize),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
+ llvm::ConstantInt::get(IntTy, 0)};
if (!Var->isDeclaration())
Builder.CreateCall(RegisterManagedVar, Args);
} else {
@@ -965,6 +984,18 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
"__cudaRegisterFatBinaryEnd");
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
}
+ // Call __cudaInitModule(GpuBinaryHandle) for managed variables
+ for (auto &&Info : DeviceVars) {
+ llvm::GlobalVariable *Var = Info.Var;
+ if (!Var->isDeclaration() && Info.Flags.isManaged()) {
+ llvm::FunctionCallee NvInitManagedRtWithModule =
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(CharTy, PtrTy, false),
+ "__cudaInitModule");
+ CtorBuilder.CreateCall(NvInitManagedRtWithModule, GpuBinaryHandle);
+ break;
+ }
+ }
} else {
// Generate a unique module ID.
SmallString<64> ModuleID;
@@ -1158,6 +1189,9 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
// transformed managed variable. The transformed managed variable contains
// the address of managed memory which will be allocated by the runtime.
void CGNVCUDARuntime::transformManagedVars() {
+ // CUDA managed variables directly access in device code
+ if (!CGM.getLangOpts().HIP && CGM.getLangOpts().CUDAIsDevice)
+ return;
for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;
if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 82bdfe2666b52..ceda4cb35a715 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -241,19 +241,26 @@ RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
- if (GV->isDeclaration())
- return;
+
const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
if (VD) {
if (M.getLangOpts().CUDA) {
- if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
+ if (!GV->isDeclaration() &&
+ VD->getType()->isCUDADeviceBuiltinSurfaceType())
addNVVMMetadata(GV, "surface", 1);
- else if (VD->getType()->isCUDADeviceBuiltinTextureType())
+ else if (!GV->isDeclaration() &&
+ VD->getType()->isCUDADeviceBuiltinTextureType())
addNVVMMetadata(GV, "texture", 1);
+ // nvlink asserts managed attribute match in decl and def
+ else if (VD->hasAttr<HIPManagedAttr>())
+ addNVVMMetadata(GV, "managed", 1);
return;
}
}
+ if (GV->isDeclaration())
+ return;
+
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index dc85eae0c5178..4630060852d21 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -8,9 +8,7 @@
#define __global__ __attribute__((global))
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
-#if __HIP__
#define __managed__ __attribute__((managed))
-#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
#define __grid_constant__ __attribute__((grid_constant))
#else
diff --git a/clang/test/CodeGenCUDA/anon-ns.cu b/clang/test/CodeGenCUDA/anon-ns.cu
index d931f31d0207c..d7398ab71502a 100644
--- a/clang/test/CodeGenCUDA/anon-ns.cu
+++ b/clang/test/CodeGenCUDA/anon-ns.cu
@@ -34,26 +34,26 @@
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant
// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
// COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global
// COMNEG-NOT: @{{.*}} = {{.*}} c"_ZN12_GLOBAL__N_12vdE{{.*}}\00"
-// HIP-DAG: @llvm.compiler.used = {{.*}}@[[VM]]{{.*}}@[[VT]]{{.*}}@[[VC]]
-// CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VT]]{{.*}}@[[VC]]
+// COMMON-DAG: @llvm.compiler.used = {{.*}}@[[VM]]{{.*}}@[[VT]]{{.*}}@[[VC]]
// COMMON-DAG: @[[KERNSTR:.*]] = {{.*}} c"[[KERN]]\00"
// COMMON-DAG: @[[KTXSTR:.*]] = {{.*}} c"[[KTX]]\00"
// COMMON-DAG: @[[KTLSTR:.*]] = {{.*}} c"[[KTL]]\00"
-// HIP-DAG: @[[VMSTR:.*]] = {{.*}} c"[[VM]]\00"
+// COMMON-DAG: @[[VMSTR:.*]] = {{.*}} c"[[VM]]\00"
// COMMON-DAG: @[[VCSTR:.*]] = {{.*}} c"[[VC]]\00"
// COMMON-DAG: @[[VTSTR:.*]] = {{.*}} c"[[VT]]\00"
// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KERNSTR]]
// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTXSTR]]
// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTLSTR]]
-// HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[VMSTR]]
+// COMMON-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[VMSTR]]
// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VCSTR]]
// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VTSTR]]
@@ -67,9 +67,7 @@ namespace {
struct X {};
X x;
auto lambda = [](){};
-#if __HIP__
__managed__ int vm = 1;
-#endif
__constant__ int vc = 2;
// C should not be externalized since it is used by device code only.
@@ -89,9 +87,7 @@ void test() {
// A, B, and tempVar<X> should be externalized since they are
// used by host code.
-#if __HIP__
getSymbol(&vm);
-#endif
getSymbol(&vc);
getSymbol(&vt<X>);
}
diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu
index 4c57323d85f9d..1acd5cd993b31 100644
--- a/clang/test/CodeGenCUDA/device-var-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -1,18 +1,29 @@
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
+// RUN: | FileCheck -check-prefixes=DEV,HIP-D,NORDC,HIP-NORDC %s
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefixes=DEV,RDC %s
+// RUN: | FileCheck -check-prefixes=DEV,HIP-D %s
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
+// RUN: | FileCheck -check-prefixes=HOST,HIP-H,NORDC-H %s
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
-// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
+// RUN: | FileCheck -check-prefixes=HOST,HIP-H,RDC-H %s
+
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
-// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \
-// RUN: | FileCheck -check-prefixes=CUDA %s
+// RUN: -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefixes=DEV,CUDA-D,NORDC,CUDA-NORDC %s
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefixes=DEV,CUDA-D %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
+// RUN: -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
+// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
#include "Inputs/cuda.h"
@@ -24,12 +35,11 @@ __device__ int v1;
// NORDC-H-DAG: @v2 = internal global i32 undef
// RDC-H-DAG: @v2 = global i32 undef
__constant__ int v2;
-// DEV-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HIP-D-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-D-DAG: @v3 = addrspace(1) externally_initialized global i32 0, align 4
// NORDC-H-DAG: @v3 = internal externally_initialized global ptr null
// RDC-H-DAG: @v3 = externally_initialized global ptr null
-#if __HIP__
__managed__ int v3;
-#endif
// DEV-DAG: @ev1 = external addrspace(1) global i32
// HOST-DAG: @ev1 = external global i32
@@ -37,45 +47,41 @@ extern __device__ int ev1;
// DEV-DAG: @ev2 = external addrspace(4) global i32
// HOST-DAG: @ev2 = external global i32
extern __constant__ int ev2;
-// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global ptr addrspace(1)
+// HIP-D-DAG: @ev3 = external addrspace(1) externally_initialized global ptr addrspace(1)
+// CUDA-D-DAG: @ev3 = external addrspace(1) global i32, align 4
// HOST-DAG: @ev3 = external externally_initialized global ptr
-#if __HIP__
extern __managed__ int ev3;
-#endif
// NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// HIP-RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// CUDA-RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
-// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
static __device__ int sv1;
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0
-// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
+// HIP-RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
+// CUDA-RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
-// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0
static __constant__ int sv2;
-// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
-// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HIP-NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 0, align 4
+// HIP-RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 0, align 4
// HOST-DAG: @_ZL3sv3 = internal externally_initialized global ptr null
-#if __HIP__
static __managed__ int sv3;
-#endif
__device__ __host__ int work(int *x);
__device__ __host__ int fun1() {
- return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2)
-#if __HIP__
- + work(&ev3) + work(&sv3)
-#endif
- ;
+ return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2) +
+ work(&ev3) + work(&sv3);
}
-// HOST: hipRegisterVar({{.*}}@v1
-// HOST: hipRegisterVar({{.*}}@v2
-// HOST: hipRegisterManagedVar({{.*}}@v3
-// HOST-NOT: hipRegisterVar({{.*}}@ev1
-// HOST-NOT: hipRegisterVar({{.*}}@ev2
-// HOST-NOT: hipRegisterManagedVar({{.*}}@ev3
-// HOST: hipRegisterVar({{.*}}@_ZL3sv1
-// HOST: hipRegisterVar({{.*}}@_ZL3sv2
-// HOST: hipRegisterManagedVar({{.*}}@_ZL3sv3
+// HIP-H: hipRegisterVar({{.*}}@v1
+// HIP-H: hipRegisterVar({{.*}}@v2
+// HIP-H: hipRegisterManagedVar({{.*}}@v3
+// HIP-H-NOT: hipRegisterVar({{.*}}@ev1
+// HIP-H-NOT: hipRegisterVar({{.*}}@ev2
+// HIP-H-NOT: hipRegisterManagedVar({{.*}}@ev3
+// HIP-H: hipRegisterVar({{.*}}@_ZL3sv1
+// HIP-H: hipRegisterVar({{.*}}@_ZL3sv2
+// HIP-H: hipRegisterManagedVar({{.*}}@_ZL3sv3
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..6d60a0b079cb4 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -1,32 +1,57 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s
+// RUN: -check-prefixes=COMMON,DEV,HIP-D,HIP-NORDC-D %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
-// RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s
+// RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,HIP-D,HIP-RDC-D %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=COMMON,HOST,NORDC %s
+// RUN: -check-prefixes=COMMON,HOST,HIP-H,NORDC %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.host
-// RUN: cat %t.host | FileCheck -check-prefixes=COMMON,HOST,RDC %s
+// RUN: cat %t.host | FileCheck -check-prefixes=COMMON,HOST,HIP-H,RDC,HIP-RDC %s
// Check device and host compilation use the same postfix for static
// variable name.
// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
+// RUN: %clang_cc1 -triple nvptx64 -fcuda-is-device -std=c++11 \
+// RUN: -emit-llvm -o - -x cuda %s | FileCheck \
+// RUN: -check-prefixes=COMMON,DEV,CUDA-D,CUDA-NORDC-D %s
+
+// RUN: %clang_cc1 -triple nvptx64 -fcuda-is-device -std=c++11 \
+// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x cuda %s > %t.dev
+// RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,CUDA-D,CUDA-RDC-D %s
+
+// RUN: echo "GPU binary" > %t.fatbin
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN: -emit-llvm -o - -x cuda %s -fcuda-include-gpubinary %t.fatbin \
+// RUN: | FileCheck -check-prefixes=COMMON,HOST,CUDA-H,NORDC,CUDA-NORDC %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x cuda %s \
+// RUN: -fcuda-include-gpubinary %t.fatbin > %t.host
+// RUN: cat %t.host \
+// RUN: | FileCheck -check-prefixes=COMMON,HOST,CUDA-H,RDC,CUDA-RDC %s
+
+// Check device and host compilation use the same postfix for static
+// variable name.
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefix=CUDA-POSTFIX %s
+
#include "Inputs/cuda.h"
struct vec {
float x,y,z;
};
-// DEV-DAG: @x.managed = addrspace(1) externally_initialized global i32 1, align 4
-// DEV-DAG: @x = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HIP-D-DAG: @x.managed = addrspace(1) externally_initialized global i32 1, align 4
+// HIP-D-DAG: @x = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-D-DAG: @x = addrspace(1) externally_initialized global i32 1, align 4
// NORDC-DAG: @x.managed = internal global i32 1
// RDC-DAG: @x.managed = global i32 1
// NORDC-DAG: @x = internal externally_initialized global ptr null
@@ -34,31 +59,41 @@ struct vec {
// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"x\00"
__managed__ int x = 1;
-// DEV-DAG: @v.managed = addrspace(1) externally_initialized global [100 x %struct.vec] zeroinitializer, align 4
-// DEV-DAG: @v = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HIP-D-DAG: @v.managed = addrspace(1) externally_initialized global [100 x %struct.vec] zeroinitializer, align 4
+// HIP-D-DAG: @v = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-D-DAG: @v = addrspace(1) externally_initialized global [100 x %struct.vec] zeroinitializer, align 4
__managed__ vec v[100];
-// DEV-DAG: @v2.managed = addrspace(1) externally_initialized global <{ %struct.vec, [99 x %struct.vec] }> <{ %struct.vec { float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 }, [99 x %struct.vec] zeroinitializer }>, align 4
-// DEV-DAG: @v2 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HIP-D-DAG: @v2.managed = addrspace(1) externally_initialized global <{ %struct.vec, [99 x %struct.vec] }> <{ %struct.vec { float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 }, [99 x %struct.vec] zeroinitializer }>, align 4
+// HIP-D-DAG: @v2 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// CUDA-D-DAG: @v2 = addrspace(1) externally_initialized global <{ %struct.vec, [99 x %struct.vec] }> <{ %struct.vec { float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 }, [99 x %struct.vec]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149716
More information about the llvm-commits
mailing list