[cfe-dev] Clang and CUDA with C++11 features
Peter Colberg
peter at colberg.org
Wed Jun 13 21:06:42 PDT 2012
On Wed, Jun 13, 2012 at 11:28:48PM -0400, Peter Colberg wrote:
> The parser interprets the compressed C++11 template parameter syntax
> as a call to a CUDA kernel function. Is there a way to disable parsing
> of the CUDA call syntax <<< >>>? I would be using a C++ wrapper around
> cudaConfigureCall, cudaSetupArgument and cudaLaunch anyway.
Hmmm, the old cudaLaunch trick does not seem to work:
#include <cuda_runtime.h>
#include <cstdio>
#include <cstdlib>
#define CUDA_REQUIRE( x ) \
{ \
cudaError_t err = (x); \
if (err != cudaSuccess) { \
fprintf( \
stderr \
, "%s (%d): error: CUDA: %s\n" \
, __FILE__ , __LINE__ \
, cudaGetErrorString(err) \
); \
exit(1); \
} \
}
__attribute__((global)) void g1(int x, int* g_array)
{
g_array[0] = x;
}
int main()
{
int* g_array = 0;
CUDA_REQUIRE( cudaMalloc(&g_array, sizeof(*g_array)) );
CUDA_REQUIRE( cudaMemset(g_array, 0, sizeof(*g_array)) );
int dev = -1;
CUDA_REQUIRE( cudaGetDevice(&dev) );
printf("Using CUDA device #%d\n", dev);
struct arguments
{
int x;
int* g_array;
};
int x = 42;
#ifdef USE_CUDA_CALL_SYNTAX
g1<<<1, 1>>>(x, g_array);
#else
CUDA_REQUIRE( cudaConfigureCall(1, 1) );
CUDA_REQUIRE( cudaSetupArgument(&x, sizeof(x), offsetof(arguments, x)) );
CUDA_REQUIRE( cudaSetupArgument(&g_array, sizeof(g_array), offsetof(arguments, g_array)) );
CUDA_REQUIRE( cudaLaunch(reinterpret_cast<char const*>(&g1)) );
#endif
CUDA_REQUIRE( cudaDeviceSynchronize() );
int result = 0;
CUDA_REQUIRE( cudaMemcpy(&result, g_array, sizeof(*g_array), cudaMemcpyDeviceToHost) );
printf("42 == %d\n", result);
}
Compile with Clang using <<< >>> syntax:
clang++ -DUSE_CUDA_CALL_SYNTAX -I/usr/local/cuda-4.2/cuda/include -L/usr/local/cuda-4.2/cuda/lib64 -lcudart -o kernel-call kernel-call.cu
./kernel-call
Using CUDA device #0
42 == 0
Compile with Clang using manual cudaLaunch:
clang++ -I/usr/local/cuda-4.2/cuda/include -L/usr/local/cuda-4.2/cuda/lib64 -lcudart -o kernel-call kernel-call.cu
./kernel-call
Using CUDA device #0
kernel-call.cu (48): error: CUDA: invalid device function
Compile with nvcc using manual cudaLaunch:
nvcc -I/usr/local/cuda-4.2/cuda/include -L/usr/local/cuda-4.2/cuda/lib64 -lcudart -o kernel-call kernel-call.cu
./kernel-call
Using CUDA device #0
42 == 42
How does the glue between host and GPU kernel work?
Could I somehow obtain a cudaLaunch-callable pointer to the GPU kernel?
The other issue is the result of kernel execution. The kernel appears
not to write to the global device memory array. I guess this also
involves the NVPTX backend. Should I forward this to the llvm-dev
mailing list, or are the NVPTX developers reading cfe-dev as well?
Thanks,
Peter
More information about the cfe-dev
mailing list