r352799 - [CUDA] add support for the new kernel launch API in CUDA-9.2+.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 31 13:34:04 PST 2019
Author: tra
Date: Thu Jan 31 13:34:03 2019
New Revision: 352799
URL: http://llvm.org/viewvc/llvm-project?rev=352799&view=rev
Log:
[CUDA] add support for the new kernel launch API in CUDA-9.2+.
Instead of calling CUDA runtime to arrange function arguments,
the new API constructs arguments in a local array and the kernels
are launched with __cudaLaunchKernel().
The old API has been deprecated and is expected to go away
in the next CUDA release.
Differential Revision: https://reviews.llvm.org/D57488
Modified:
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/include/clang/Sema/Sema.h
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h
cfe/trunk/lib/Sema/SemaCUDA.cpp
cfe/trunk/lib/Sema/SemaDecl.cpp
cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h
cfe/trunk/test/CodeGenCUDA/device-stub.cu
cfe/trunk/test/CodeGenCUDA/kernel-args-alignment.cu
cfe/trunk/test/CodeGenCUDA/kernel-call.cu
cfe/trunk/test/Driver/cuda-simple.cu
cfe/trunk/test/SemaCUDA/Inputs/cuda.h
cfe/trunk/test/SemaCUDA/config-type.cu
cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Thu Jan 31 13:34:03 2019
@@ -7143,7 +7143,7 @@ def err_kern_type_not_void_return : Erro
def err_kern_is_nonstatic_method : Error<
"kernel function %0 must be a free function or static member function">;
def err_config_scalar_return : Error<
- "CUDA special function 'cudaConfigureCall' must have scalar return type">;
+ "CUDA special function '%0' must have scalar return type">;
def err_kern_call_not_global_function : Error<
"kernel call to non-global function %0">;
def err_global_call_not_config : Error<
Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Thu Jan 31 13:34:03 2019
@@ -10348,6 +10348,11 @@ public:
/// Copies target attributes from the template TD to the function FD.
void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
+ /// Returns the name of the launch configuration function. This is the name
+ /// of the function that will be called to configure kernel call, with the
+ /// parameters specified via <<<>>>.
+ std::string getCudaConfigureFuncName() const;
+
/// \name Code completion
//@{
/// Describes the context in which code completion occurs.
Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Jan 31 13:34:03 2019
@@ -15,6 +15,8 @@
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
+#include "clang/Basic/Cuda.h"
+#include "clang/CodeGen/CodeGenABITypes.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -102,7 +104,8 @@ private:
return DummyFunc;
}
- void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
+ void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
+ void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
public:
CGNVCUDARuntime(CodeGenModule &CGM);
@@ -187,11 +190,110 @@ llvm::FunctionType *CGNVCUDARuntime::get
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
EmittedKernels.push_back(CGF.CurFn);
- emitDeviceStubBody(CGF, Args);
+ if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
+ CudaFeature::CUDA_USES_NEW_LAUNCH))
+ emitDeviceStubBodyNew(CGF, Args);
+ else
+ emitDeviceStubBodyLegacy(CGF, Args);
}
-void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
- FunctionArgList &Args) {
+// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
+// array and kernels are launched using cudaLaunchKernel().
+void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
+ FunctionArgList &Args) {
+ // Build the shadow stack entry at the very start of the function.
+
+ // Calculate amount of space we will need for all arguments. If we have no
+ // args, allocate a single pointer so we still have a valid pointer to the
+ // argument array that we can pass to runtime, even if it will be unused.
+ Address KernelArgs = CGF.CreateTempAlloca(
+ VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
+ llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
+ // Store pointers to the arguments in a locally allocated launch_args.
+ for (unsigned i = 0; i < Args.size(); ++i) {
+ llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
+ llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
+ CGF.Builder.CreateDefaultAlignedStore(
+ VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i));
+ }
+
+ llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
+
+ // Lookup cudaLaunchKernel function.
+ // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // cudaStream_t stream);
+ TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+ DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ IdentifierInfo &cudaLaunchKernelII =
+ CGM.getContext().Idents.get("cudaLaunchKernel");
+ FunctionDecl *cudaLaunchKernelFD = nullptr;
+ for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
+ if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
+ cudaLaunchKernelFD = FD;
+ }
+
+ if (cudaLaunchKernelFD == nullptr) {
+ CGM.Error(CGF.CurFuncDecl->getLocation(),
+ "Can't find declaration for cudaLaunchKernel()");
+ return;
+ }
+ // Create temporary dim3 grid_dim, block_dim.
+ ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1);
+ QualType Dim3Ty = GridDimParam->getType();
+ Address GridDim =
+ CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim");
+ Address BlockDim =
+ CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
+ Address ShmemSize =
+ CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
+ Address Stream =
+ CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
+ llvm::Constant *cudaPopConfigFn = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy,
+ {/*gridDim=*/GridDim.getType(),
+ /*blockDim=*/BlockDim.getType(),
+ /*ShmemSize=*/ShmemSize.getType(),
+ /*Stream=*/Stream.getType()},
+ /*isVarArg=*/false),
+ "__cudaPopCallConfiguration");
+
+ CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
+ {GridDim.getPointer(), BlockDim.getPointer(),
+ ShmemSize.getPointer(), Stream.getPointer()});
+
+ // Emit the call to cudaLaunch
+ llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
+ CallArgList LaunchKernelArgs;
+ LaunchKernelArgs.add(RValue::get(Kernel),
+ cudaLaunchKernelFD->getParamDecl(0)->getType());
+ LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty);
+ LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty);
+ LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()),
+ cudaLaunchKernelFD->getParamDecl(3)->getType());
+ LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)),
+ cudaLaunchKernelFD->getParamDecl(4)->getType());
+ LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)),
+ cudaLaunchKernelFD->getParamDecl(5)->getType());
+
+ QualType QT = cudaLaunchKernelFD->getType();
+ QualType CQT = QT.getCanonicalType();
+ llvm::Type *Ty = CGM.getTypes().ConvertFunctionType(CQT, cudaLaunchKernelFD);
+ llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);
+
+ const CGFunctionInfo &FI =
+ CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
+ llvm::Constant *cudaLaunchKernelFn =
+ CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
+ LaunchKernelArgs);
+ CGF.EmitBranch(EndBlock);
+
+ CGF.EmitBlock(EndBlock);
+}
+
+void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
+ FunctionArgList &Args) {
// Emit a call to cudaSetupArgument for each arg in Args.
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
Modified: cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h (original)
+++ cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h Thu Jan 31 13:34:03 2019
@@ -426,5 +426,15 @@ __device__ inline __cuda_builtin_gridDim
#pragma pop_macro("__USE_FAST_MATH__")
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
+// CUDA runtime uses this undocumented function to access kernel launch
+// configuration. The declaration is in crt/device_functions.h but that file
+// includes a lot of other stuff we don't want. Instead, we'll provide our own
+// declaration for it here.
+#if CUDA_VERSION >= 9020
+extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
+ size_t sharedMem = 0,
+ void *stream = 0);
+#endif
+
#endif // __CUDA__
#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Thu Jan 31 13:34:03 2019
@@ -13,6 +13,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
@@ -41,9 +42,8 @@ ExprResult Sema::ActOnCUDAExecConfigExpr
SourceLocation GGGLoc) {
FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
if (!ConfigDecl)
- return ExprError(
- Diag(LLLLoc, diag::err_undeclared_var_use)
- << (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall"));
+ return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
+ << getCudaConfigureFuncName());
QualType ConfigQTy = ConfigDecl->getType();
DeclRefExpr *ConfigDR = new (Context)
@@ -957,3 +957,16 @@ void Sema::inheritCUDATargetAttrs(Functi
copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
}
+
+std::string Sema::getCudaConfigureFuncName() const {
+ if (getLangOpts().HIP)
+ return "hipConfigureCall";
+
+ // New CUDA kernel launch sequence.
+ if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
+ CudaFeature::CUDA_USES_NEW_LAUNCH))
+ return "__cudaPushCallConfiguration";
+
+ // Legacy CUDA kernel configuration call
+ return "cudaConfigureCall";
+}
Modified: cfe/trunk/lib/Sema/SemaDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDecl.cpp?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDecl.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDecl.cpp Thu Jan 31 13:34:03 2019
@@ -9146,13 +9146,12 @@ Sema::ActOnFunctionDeclarator(Scope *S,
if (getLangOpts().CUDA) {
IdentifierInfo *II = NewFD->getIdentifier();
- if (II &&
- II->isStr(getLangOpts().HIP ? "hipConfigureCall"
- : "cudaConfigureCall") &&
+ if (II && II->isStr(getCudaConfigureFuncName()) &&
!NewFD->isInvalidDecl() &&
NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
if (!R->getAs<FunctionType>()->getReturnType()->isScalarType())
- Diag(NewFD->getLocation(), diag::err_config_scalar_return);
+ Diag(NewFD->getLocation(), diag::err_config_scalar_return)
+ << getCudaConfigureFuncName();
Context.setcudaConfigureCallDecl(NewFD);
}
Modified: cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h (original)
+++ cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h Thu Jan 31 13:34:03 2019
@@ -15,13 +15,20 @@ struct dim3 {
};
typedef struct cudaStream *cudaStream_t;
-
+typedef enum cudaError {} cudaError_t;
#ifdef __HIP__
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
cudaStream_t stream = 0);
#else
-int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
- cudaStream_t stream = 0);
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
#endif
extern "C" __device__ int printf(const char*, ...);
Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Jan 31 13:34:03 2019
@@ -1,14 +1,36 @@
// RUN: echo "GPU binary would be here" > %t
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-include-gpubinary %t -o - \
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC
+// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
+// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-OLD
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t \
+// RUN: -o - -DNOGLOBALS \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
+// RUN: -check-prefixes=NOGLOBALS,CUDANOGLOBALS
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -target-sdk-version=8.0 -fgpu-rdc -fcuda-include-gpubinary %t \
+// RUN: -o - \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
+// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA-OLD
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS
+// RUN: -target-sdk-version=8.0 -o - \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
+// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
+// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-NEW
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s \
+// RUN: --check-prefixes=NOGLOBALS,CUDANOGLOBALS
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
+// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
+// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA_NEW
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
+// RUN: -target-sdk-version=9.2 -o - \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
@@ -103,15 +125,34 @@ void use_pointers() {
// by a call to cudaLaunch.
// ALL: define{{.*}}kernelfunc
-// ALL: call{{.*}}[[PREFIX]]SetupArgument
-// ALL: call{{.*}}[[PREFIX]]SetupArgument
-// ALL: call{{.*}}[[PREFIX]]SetupArgument
-// ALL: call{{.*}}[[PREFIX]]Launch
+
+// New launch sequence stores arguments into local buffer and passes array of
+// pointers to them directly to cudaLaunchKernel
+// CUDA-NEW: alloca
+// CUDA-NEW: store
+// CUDA-NEW: store
+// CUDA-NEW: store
+// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
+// CUDA-NEW: call{{.*}}cudaLaunchKernel
+
+// Legacy style launch sequence sets up arguments by passing them to
+// [cuda|hip]SetupArgument.
+// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
+// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
+// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument
+// CUDA-OLD: call{{.*}}[[PREFIX]]Launch
+
+// HIP: call{{.*}}[[PREFIX]]SetupArgument
+// HIP: call{{.*}}[[PREFIX]]SetupArgument
+// HIP: call{{.*}}[[PREFIX]]SetupArgument
+// HIP: call{{.*}}[[PREFIX]]Launch
__global__ void kernelfunc(int i, int j, int k) {}
// Test that we've built correct kernel launch sequence.
// ALL: define{{.*}}hostfunc
-// ALL: call{{.*}}[[PREFIX]]ConfigureCall
+// CUDA-OLD: call{{.*}}[[PREFIX]]ConfigureCall
+// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
+// HIP: call{{.*}}[[PREFIX]]ConfigureCall
// ALL: call{{.*}}kernelfunc
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
#endif
Modified: cfe/trunk/test/CodeGenCUDA/kernel-args-alignment.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-args-alignment.cu?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/kernel-args-alignment.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/kernel-args-alignment.cu Thu Jan 31 13:34:03 2019
@@ -1,8 +1,12 @@
-// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
-// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s
+// New CUDA kernel launch sequence does not require explicit specification of
+// size/offset for each argument, so only the old way is tested.
+//
+// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN: -target-sdk-version=8.0 -o - %s \
+// RUN: | FileCheck -check-prefixes=HOST-OLD,CHECK %s
// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \
-// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s
+// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK %s
#include "Inputs/cuda.h"
@@ -27,9 +31,9 @@ static_assert(alignof(S) == 8, "Unexpect
// 1. offset 0, width 1
// 2. offset 8 (because alignof(S) == 8), width 16
// 3. offset 24, width 8
-// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 1, i64 0)
-// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 16, i64 8)
-// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
+// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 1, i64 0)
+// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 16, i64 8)
+// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
// DEVICE-LABEL: @_Z6kernelc1SPi
// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32*
Modified: cfe/trunk/test/CodeGenCUDA/kernel-call.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-call.cu?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/kernel-call.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/kernel-call.cu Thu Jan 31 13:34:03 2019
@@ -1,5 +1,9 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK
-// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECK
+// RUN: %clang_cc1 -target-sdk-version=8.0 -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK
+// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
+// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=HIP,CHECK
#include "Inputs/cuda.h"
@@ -7,14 +11,17 @@
// CHECK-LABEL: define{{.*}}g1
// HIP: call{{.*}}hipSetupArgument
// HIP: call{{.*}}hipLaunchByPtr
-// CUDA: call{{.*}}cudaSetupArgument
-// CUDA: call{{.*}}cudaLaunch
+// CUDA-OLD: call{{.*}}cudaSetupArgument
+// CUDA-OLD: call{{.*}}cudaLaunch
+// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
+// CUDA-NEW: call{{.*}}cudaLaunchKernel
__global__ void g1(int x) {}
// CHECK-LABEL: define{{.*}}main
int main(void) {
// HIP: call{{.*}}hipConfigureCall
- // CUDA: call{{.*}}cudaConfigureCall
+ // CUDA-OLD: call{{.*}}cudaConfigureCall
+ // CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
// CHECK: icmp
// CHECK: br
// CHECK: call{{.*}}g1
Modified: cfe/trunk/test/Driver/cuda-simple.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-simple.cu?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/Driver/cuda-simple.cu (original)
+++ cfe/trunk/test/Driver/cuda-simple.cu Thu Jan 31 13:34:03 2019
@@ -2,7 +2,7 @@
// http://llvm.org/PR22936
// RUN: %clang -nocudainc -nocudalib -Werror -fsyntax-only -c %s
//
-// Verify that we pass -x cuda-cpp-output to compiler after
+// Verify that we pass -x cuda-cpp-output to compiler after
// preprocessing a CUDA file
// RUN: %clang -Werror -### -save-temps -c %s 2>&1 | FileCheck %s
// CHECK: "-cc1"
@@ -14,7 +14,9 @@
// Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output".
// RUN: %clang -Werror -fsyntax-only -x cuda-cpp-output -c %s
-int cudaConfigureCall(int, int);
+extern "C" int cudaConfigureCall(int, int);
+extern "C" int __cudaPushCallConfiguration(int, int);
+
__attribute__((global)) void kernel() {}
void func() {
Modified: cfe/trunk/test/SemaCUDA/Inputs/cuda.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/Inputs/cuda.h?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/SemaCUDA/Inputs/cuda.h (original)
+++ cfe/trunk/test/SemaCUDA/Inputs/cuda.h Thu Jan 31 13:34:03 2019
@@ -18,9 +18,17 @@ struct dim3 {
};
typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
-int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
- cudaStream_t stream = 0);
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
// Host- and device-side placement new overloads.
void *operator new(__SIZE_TYPE__, void *p) { return p; }
Modified: cfe/trunk/test/SemaCUDA/config-type.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/config-type.cu?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/test/SemaCUDA/config-type.cu (original)
+++ cfe/trunk/test/SemaCUDA/config-type.cu Thu Jan 31 13:34:03 2019
@@ -1,3 +1,7 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -target-sdk-version=8.0 -fsyntax-only -verify=legacy-launch %s
+// RUN: %clang_cc1 -target-sdk-version=9.2 -fsyntax-only -verify=new-launch %s
-void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}}
+// legacy-launch-error at +1 {{must have scalar return type}}
+void cudaConfigureCall(unsigned gridSize, unsigned blockSize);
+// new-launch-error at +1 {{must have scalar return type}}
+void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize);
Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h?rev=352799&r1=352798&r2=352799&view=diff
==============================================================================
--- cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h (original)
+++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h Thu Jan 31 13:34:03 2019
@@ -183,7 +183,9 @@ testing::AssertionResult matchesConditio
"typedef struct cudaStream *cudaStream_t;"
"int cudaConfigureCall(dim3 gridSize, dim3 blockSize,"
" size_t sharedSize = 0,"
- " cudaStream_t stream = 0);";
+ " cudaStream_t stream = 0);"
+ "extern \"C\" unsigned __cudaPushCallConfiguration("
+ " dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0);";
bool Found = false, DynamicFound = false;
MatchFinder Finder;
More information about the cfe-commits
mailing list