<html>
    <head>
      <base href="http://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - clang fails to emit LLVM for template CUDA kernels (device code with -fcuda-is-device)"
   href="http://llvm.org/bugs/show_bug.cgi?id=18778">18778</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>clang fails to emit LLVM for template CUDA kernels (device code with -fcuda-is-device)
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>LLVM Codegen
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>michael.haidl@uni-muenster.de
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvmbugs@cs.uiuc.edu
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>When compiling valid CUDA code to LLVM (clang++ -Xclang -fcuda-is-device -S
-emit-llvm -target nvptx64) clang fails to emit LLVM code on template kernels.
This may happen because the global attribute get lost on template
instanciation. Non-template kernels compile fine and working LLVM code is
emitted. However, template device functions called from a global function is
correctly instanciated and emitted.

Code to reproduce:

#ifndef __CUDACC__

#include <stddef.h>

#define __constant__ __attribute__((constant))
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))

struct dim3 {
  unsigned x, y, z;
  __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x),
y(y), z(z) {}
}; 

typedef struct cudaStream *cudaStream_t;

int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
                      cudaStream_t stream = 0);
#endif

template <typename T>
__device__ int dev_f(T& a, float& b, double& c)
{
        a = a * b;
        b = b - c;
        c = a * c;
        return a;
}

template <typename T>
__global__ void kernel(T a, float b, double c)
{
        int result = dev_f<T>(a, b, c);
}

int main()
{
        kernel<int><<<dim3(1), dim3(1)>>>(5, 0.7f, 12.34);
        return 0;
}</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>