[clang] 1eaad01 - [CUDA][HIP] Let lambda be host device by default
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 8 10:11:07 PDT 2020
Author: Yaxun (Sam) Liu
Date: 2020-07-08T13:10:26-04:00
New Revision: 1eaad01046c88be6bf65265a2bcc53db5a5b48d0
URL: https://github.com/llvm/llvm-project/commit/1eaad01046c88be6bf65265a2bcc53db5a5b48d0
DIFF: https://github.com/llvm/llvm-project/commit/1eaad01046c88be6bf65265a2bcc53db5a5b48d0.diff
LOG: [CUDA][HIP] Let lambda be host device by default
This patch let lambda be host device by default and adds diagnostics for
capturing host variable by reference in device lambda.
Differential Revision: https://reviews.llvm.org/D78655
Added:
clang/test/CodeGenCUDA/lambda.cu
clang/test/SemaCUDA/lambda.cu
Modified:
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/Sema.h
clang/lib/Sema/SemaCUDA.cpp
clang/lib/Sema/SemaLambda.cpp
clang/test/SemaCUDA/Inputs/cuda.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e99fd3d1f105..c0d0acccb181 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7999,6 +7999,10 @@ def err_ref_bad_target : Error<
def err_ref_bad_target_global_initializer : Error<
"reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
"function %1 in global initializer">;
+def err_capture_bad_target : Error<
+ "capture host variable %0 by reference in device or host device lambda function">;
+def err_capture_bad_target_this_ptr : Error<
+ "capture host side class data member by this pointer in device or host device lambda function">;
def warn_kern_is_method : Extension<
"kernel function %0 is a member function; this may not be accepted by nvcc">,
InGroup<CudaCompat>;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 8ee7dd74712d..a540d788f4d8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11828,12 +11828,13 @@ class Sema final {
/// - Otherwise, returns true without emitting any diagnostics.
bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee);
+ void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture);
+
/// Set __device__ or __host__ __device__ attributes on the given lambda
/// operator() method.
///
- /// CUDA lambdas declared inside __device__ or __global__ functions inherit
- /// the __device__ attribute. Similarly, lambdas inside __host__ __device__
- /// functions become __host__ __device__ themselves.
+ /// CUDA lambdas by default is host device function unless it has explicit
+ /// host or device attribute.
void CUDASetLambdaAttrs(CXXMethodDecl *Method);
/// Finds a function in \p Matches with highest calling priority
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index f54d97a2a319..283a04683a32 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -17,6 +17,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
+#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Sema/SemaInternal.h"
@@ -746,20 +747,58 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack;
}
+// Check the wrong-sided reference capture of lambda for CUDA/HIP.
+// A lambda function may capture a stack variable by reference when it is
+// defined and uses the capture by reference when the lambda is called. When
+// the capture and use happen on
diff erent sides, the capture is invalid and
+// should be diagnosed.
+void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
+ const sema::Capture &Capture) {
+ // In host compilation we only need to check lambda functions emitted on host
+ // side. In such lambda functions, a reference capture is invalid only
+ // if the lambda structure is populated by a device function or kernel then
+ // is passed to and called by a host function. However that is impossible,
+ // since a device function or kernel can only call a device function, also a
+ // kernel cannot pass a lambda back to a host function since we cannot
+ // define a kernel argument type which can hold the lambda before the lambda
+ // itself is defined.
+ if (!LangOpts.CUDAIsDevice)
+ return;
+
+ // File-scope lambda can only do init captures for global variables, which
+ // results in passing by value for these global variables.
+ FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+ if (!Caller)
+ return;
+
+ // In device compilation, we only need to check lambda functions which are
+ // emitted on device side. For such lambdas, a reference capture is invalid
+ // only if the lambda structure is populated by a host function then passed
+ // to and called in a device function or kernel.
+ bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
+ bool CallerIsHost =
+ !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
+ bool ShouldCheck = CalleeIsDevice && CallerIsHost;
+ if (!ShouldCheck || !Capture.isReferenceCapture())
+ return;
+ auto DiagKind = DeviceDiagBuilder::K_Deferred;
+ if (Capture.isVariableCapture()) {
+ DeviceDiagBuilder(DiagKind, Capture.getLocation(),
+ diag::err_capture_bad_target, Callee, *this)
+ << Capture.getVariable();
+ } else if (Capture.isThisCapture()) {
+ DeviceDiagBuilder(DiagKind, Capture.getLocation(),
+ diag::err_capture_bad_target_this_ptr, Callee, *this);
+ }
+ return;
+}
+
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
return;
- FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
- if (!CurFn)
- return;
- CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
- if (Target == CFT_Global || Target == CFT_Device) {
- Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
- } else if (Target == CFT_HostDevice) {
- Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
- Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
- }
+ Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
}
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index e751a73957d0..657ed13f207a 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -990,8 +990,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
// Attributes on the lambda apply to the method.
ProcessDeclAttributes(CurScope, Method, ParamInfo);
- // CUDA lambdas get implicit attributes based on the scope in which they're
- // declared.
+ // CUDA lambdas get implicit host and device attributes.
if (getLangOpts().CUDA)
CUDASetLambdaAttrs(Method);
@@ -1780,6 +1779,9 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
BuildCaptureField(Class, From);
Captures.push_back(Capture);
CaptureInits.push_back(Init.get());
+
+ if (LangOpts.CUDA)
+ CUDACheckLambdaCapture(CallOperator, From);
}
Class->setCaptures(Captures);
diff --git a/clang/test/CodeGenCUDA/lambda.cu b/clang/test/CodeGenCUDA/lambda.cu
new file mode 100644
index 000000000000..8a639c90f266
--- /dev/null
+++ b/clang/test/CodeGenCUDA/lambda.cu
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple x86_64-linux-gnu \
+// RUN: | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: | FileCheck -check-prefix=DEV %s
+
+#include "Inputs/cuda.h"
+
+// Device side kernel name.
+// HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00"
+// HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00"
+
+// Check functions emitted for test_capture in host compilation.
+// Check lambda is not emitted in host compilation.
+// HOST-LABEL: define void @_Z12test_capturev
+// HOST: call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
+// HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
+// HOST: call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_
+// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv
+
+// Check functions emitted for test_resolve in host compilation.
+// Check host version of template function 'overloaded' is emitted and called
+// by the lambda function.
+// HOST-LABEL: define void @_Z12test_resolvev
+// HOST: call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_()
+// HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_
+// HOST: call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_
+// HOST: call void @_ZZ12test_resolvevENKUlvE_clEv
+// HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
+// HOST: call i32 @_Z10overloadedIiET_v
+// HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
+// HOST: ret i32 2
+
+// Check kernel is registered with correct device side kernel name.
+// HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]]
+// HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]]
+
+// DEV: @a = addrspace(1) externally_initialized global i32 0
+
+// Check functions emitted for test_capture in device compilation.
+// Check lambda is emitted in device compilation and accessing device variable.
+// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_
+// DEV: call void @_ZZ12test_capturevENKUlvE_clEv
+// DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv
+// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*)
+
+// Check functions emitted for test_resolve in device compilation.
+// Check device version of template function 'overloaded' is emitted and called
+// by the lambda function.
+// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_
+// DEV: call void @_ZZ12test_resolvevENKUlvE_clEv
+// DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
+// DEV: call i32 @_Z10overloadedIiET_v
+// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
+// DEV: ret i32 1
+
+__device__ int a;
+
+template<class T>
+__device__ T overloaded() { return 1; }
+
+template<class T>
+__host__ T overloaded() { return 2; }
+
+template<class F>
+__global__ void g(F f) { f(); }
+
+template<class F>
+void test_capture_helper(F f) { g<<<1,1>>>(f); }
+
+template<class F>
+void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); }
+
+// Test capture of device variable in lambda function.
+void test_capture(void) {
+ test_capture_helper([](){ a = 1;});
+}
+
+// Test resolving host/device function in lambda function.
+// Callee should resolve to correct host/device function based on where
+// the lambda function is called, not where it is defined.
+void test_resolve(void) {
+ test_resolve_helper([](){ overloaded<int>();});
+}
diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h
index 2600bfa9c470..901f8e0c17cf 100644
--- a/clang/test/SemaCUDA/Inputs/cuda.h
+++ b/clang/test/SemaCUDA/Inputs/cuda.h
@@ -17,6 +17,19 @@ struct dim3 {
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
+#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
typedef struct cudaStream *cudaStream_t;
typedef enum cudaError {} cudaError_t;
@@ -29,6 +42,7 @@ extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
dim3 blockDim, void **args,
size_t sharedMem, cudaStream_t stream);
+#endif
// Host- and device-side placement new overloads.
void *operator new(__SIZE_TYPE__, void *p) { return p; }
diff --git a/clang/test/SemaCUDA/lambda.cu b/clang/test/SemaCUDA/lambda.cu
new file mode 100644
index 000000000000..6f305a683c00
--- /dev/null
+++ b/clang/test/SemaCUDA/lambda.cu
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify=com %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fcuda-is-device -verify=com,dev %s
+
+#include "Inputs/cuda.h"
+
+auto global_lambda = [] () { return 123; };
+
+template<class F>
+__global__ void kernel(F f) { f(); }
+// dev-note at -1 7{{called by 'kernel<(lambda}}
+
+__host__ __device__ void hd(int x);
+
+class A {
+ int b;
+public:
+ void test() {
+ [=](){ hd(b); }();
+
+ [&](){ hd(b); }();
+
+ kernel<<<1,1>>>([](){ hd(0); });
+
+ kernel<<<1,1>>>([=](){ hd(b); });
+ // dev-error at -1 {{capture host side class data member by this pointer in device or host device lambda function}}
+
+ kernel<<<1,1>>>([&](){ hd(b); });
+ // dev-error at -1 {{capture host side class data member by this pointer in device or host device lambda function}}
+
+ kernel<<<1,1>>>([&] __device__ (){ hd(b); });
+ // dev-error at -1 {{capture host side class data member by this pointer in device or host device lambda function}}
+
+ kernel<<<1,1>>>([&](){
+ auto f = [&]{ hd(b); };
+ // dev-error at -1 {{capture host side class data member by this pointer in device or host device lambda function}}
+ f();
+ });
+ }
+};
+
+int main(void) {
+ auto lambda_kernel = [&]__global__(){};
+ // com-error at -1 {{kernel function 'operator()' must be a free function or static member function}}
+
+ int b;
+ [&](){ hd(b); }();
+
+ [=, &b](){ hd(b); }();
+
+ kernel<<<1,1>>>(global_lambda);
+
+ kernel<<<1,1>>>([](){ hd(0); });
+
+ kernel<<<1,1>>>([=](){ hd(b); });
+
+ kernel<<<1,1>>>([b](){ hd(b); });
+
+ kernel<<<1,1>>>([&](){ hd(b); });
+ // dev-error at -1 {{capture host variable 'b' by reference in device or host device lambda function}}
+
+ kernel<<<1,1>>>([=, &b](){ hd(b); });
+ // dev-error at -1 {{capture host variable 'b' by reference in device or host device lambda function}}
+
+ kernel<<<1,1>>>([&, b](){ hd(b); });
+
+ kernel<<<1,1>>>([&](){
+ auto f = [&]{ hd(b); };
+ // dev-error at -1 {{capture host variable 'b' by reference in device or host device lambda function}}
+ f();
+ });
+
+ return 0;
+}
More information about the cfe-commits
mailing list