r284355 - [CUDA] Fix false-positive in known-emitted handling.
Justin Lebar via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 16 19:25:55 PDT 2016
Author: jlebar
Date: Sun Oct 16 21:25:55 2016
New Revision: 284355
URL: http://llvm.org/viewvc/llvm-project?rev=284355&view=rev
Log:
[CUDA] Fix false-positive in known-emitted handling.
Previously: When compiling for host, our constructed call graph went
*through* kernel calls. This meant that if we had
host calls kernel calls HD
we would incorrectly mark the HD function as known-emitted on the host
side, and thus perform host-side checks on it.
Fixing this exposed another issue, wherein when marking a function as
known-emitted, we also need to traverse the callgraph of its template,
because non-dependent calls are attached to a function's template, not
its instantiation.
Added:
cfe/trunk/test/SemaCUDA/trace-through-global.cu
Modified:
cfe/trunk/lib/Sema/SemaCUDA.cpp
Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=284355&r1=284354&r2=284355&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Sun Oct 16 21:25:55 2016
@@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, Fu
S.CUDAKnownEmittedFns.insert(Caller);
EmitDeferredDiags(S, Caller);
- // Deferred diags are often emitted on the template itself, so emit those as
- // well.
- if (auto *Templ = Caller->getPrimaryTemplate())
- EmitDeferredDiags(S, Templ->getAsFunction());
+ // If this is a template instantiation, explore its callgraph as well:
+ // Non-dependent calls are part of the template's callgraph, while dependent
+ // calls are part of to the instantiation's call graph.
+ if (auto *Templ = Caller->getPrimaryTemplate()) {
+ FunctionDecl *TemplFD = Templ->getAsFunction();
+ if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
+ Seen.insert(TemplFD);
+ Worklist.push_back(TemplFD);
+ }
+ }
// Add all functions called by Caller to our worklist.
auto CGIt = S.CUDACallGraph.find(Caller);
@@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation
if (!Caller)
return true;
+ // If the caller is known-emitted, mark the callee as known-emitted.
+ // Otherwise, mark the call in our call graph so we can traverse it later.
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
if (CallerKnownEmitted)
MarkKnownEmitted(*this, Callee);
- else
- CUDACallGraph[Caller].insert(Callee);
+ else {
+ // If we have
+ // host fn calls kernel fn calls host+device,
+ // the HD function does not get instantiated on the host. We model this by
+ // omitting at the call to the kernel from the callgraph. This ensures
+ // that, when compiling for host, only HD functions actually called from the
+ // host get marked as known-emitted.
+ if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+ CUDACallGraph[Caller].insert(Callee);
+ }
CUDADiagBuilder::Kind DiagKind = [&] {
switch (IdentifyCUDAPreference(Caller, Callee)) {
Added: cfe/trunk/test/SemaCUDA/trace-through-global.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/trace-through-global.cu?rev=284355&view=auto
==============================================================================
--- cfe/trunk/test/SemaCUDA/trace-through-global.cu (added)
+++ cfe/trunk/test/SemaCUDA/trace-through-global.cu Sun Oct 16 21:25:55 2016
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// Check that it's OK for kernels to call HD functions that call device-only
+// functions.
+
+#include "Inputs/cuda.h"
+
+__device__ void device_fn(int) {}
+// expected-note at -1 {{declared here}}
+// expected-note at -2 {{declared here}}
+
+inline __host__ __device__ int hd1() {
+ device_fn(0); // expected-error {{reference to __device__ function}}
+ return 0;
+}
+
+inline __host__ __device__ int hd2() {
+ // No error here because hd2 is only referenced from a kernel.
+ device_fn(0);
+ return 0;
+}
+
+inline __host__ __device__ void hd3(int) {
+ device_fn(0); // expected-error {{reference to __device__ function 'device_fn'}}
+}
+inline __host__ __device__ void hd3(double) {}
+
+inline __host__ __device__ void hd4(int) {}
+inline __host__ __device__ void hd4(double) {
+ device_fn(0); // No error; this function is never called.
+}
+
+__global__ void kernel(int) { hd2(); }
+
+template <typename T>
+void launch_kernel() {
+ kernel<<<0, 0>>>(T());
+ hd1();
+ hd3(T());
+}
+
+void host_fn() {
+ launch_kernel<int>();
+}
More information about the cfe-commits
mailing list