[clang] b952d79 - [cuda][hip] Fix `RegisterVar` function prototype.
Michael Liao via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 3 09:57:19 PDT 2020
Author: Michael Liao
Date: 2020-04-03T12:57:09-04:00
New Revision: b952d799cacdb7efd44c1c9468bb11471cc16874
URL: https://github.com/llvm/llvm-project/commit/b952d799cacdb7efd44c1c9468bb11471cc16874
DIFF: https://github.com/llvm/llvm-project/commit/b952d799cacdb7efd44c1c9468bb11471cc16874.diff
LOG: [cuda][hip] Fix `RegisterVar` function prototype.
Summary:
- `RegisterVar` has `void` return type and `size_t` in its variable size
parameter in HIP or CUDA 9.0+.
Reviewers: tra, yaxunl
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77398
Added:
Modified:
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/CodeGen/CGCUDANV.cpp
clang/test/CodeGenCUDA/device-stub.cu
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index da572957d10d..c2ebf8734304 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -117,6 +117,7 @@ enum class CudaFeature {
CUDA_USES_FATBIN_REGISTER_END,
};
+CudaVersion ToCudaVersion(llvm::VersionTuple);
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature);
bool CudaFeatureEnabled(CudaVersion, CudaFeature);
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index e06d120c58bf..74eb5473b71d 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -362,7 +362,7 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) {
}
}
-static CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
+CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
int IVer =
Version.getMajor() * 10 + Version.getMinor().getValueOr(0);
switch(IVer) {
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 6d92ef33b885..351c5058aa4c 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -440,13 +440,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
Builder.CreateCall(RegisterFunc, Args);
}
+ llvm::Type *VarSizeTy = IntTy;
+ // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
+ if (CGM.getLangOpts().HIP ||
+ ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
+ VarSizeTy = SizeTy;
+
// void __cudaRegisterVar(void **, char *, char *, const char *,
// int, int, int, int)
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
- CharPtrTy, IntTy, IntTy,
+ CharPtrTy, IntTy, VarSizeTy,
IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, RegisterVarParams, false),
+ llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
// const void **, const char *, int, int);
@@ -476,7 +482,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
VarName,
VarName,
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
- llvm::ConstantInt::get(IntTy, VarSize),
+ llvm::ConstantInt::get(VarSizeTy, VarSize),
llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
llvm::ConstantInt::get(IntTy, 0)};
Builder.CreateCall(RegisterVar, Args);
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 9db5738cdede..0f4a5644fd48 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -181,10 +181,10 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// Test that we've built a function to register kernels and global vars.
// ALL: define internal void @__[[PREFIX]]_register_globals
// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc{{[^,]*}}, {{[^@]*}}@0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, i32 4, i32 1, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, i32 4, i32 1, i32 0
+// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
+// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
+// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
+// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
// ALL: ret void
// Test that we've built a constructor.
More information about the cfe-commits
mailing list